Enhanced the LexerError and Documentation of Types
I went and enhanced the LexerError to now wrap a Rust source Error. This makes it so we can wrapup the possible IO errors we get when trying to open a file and read its contents. I also added some documentation for all the implemented functions.
This commit is contained in:
parent
31290cc86f
commit
a926e08061
156
src/error.rs
156
src/error.rs
@ -1,59 +1,127 @@
|
||||
use std::{error::Error, path::PathBuf};
|
||||
use super::position::Span;
|
||||
|
||||
|
||||
/// Represents an error encountered during lexical analysis.
|
||||
///
|
||||
/// `LexerError` contains contextual information about where the error
|
||||
/// occurred in the source input, an optional offending snippet,
|
||||
/// the file path (if applicable), and an optional underlying error
|
||||
/// that triggered the failure.
|
||||
///
|
||||
/// It is designed to provide detailed diagnostics for file-based or
|
||||
/// in-memory parsing and is compatible with error reporting ecosystems.
|
||||
#[derive(Debug)]
|
||||
pub struct LexerError {
|
||||
/// A human-readable error message.
|
||||
pub message: String,
|
||||
|
||||
/// An error that has occured during lexigraphical analysis.
|
||||
#[derive(Debug, Clone, PartialEq)]
|
||||
pub struct LexerError
|
||||
{
|
||||
/// A human-readable error message.
|
||||
pub message: String,
|
||||
/// The span where the error occurred.
|
||||
pub span: Span,
|
||||
|
||||
/// The start and end of where the error is located in the file.
|
||||
pub span: Span,
|
||||
/// The file that the error occurred in, if known.
|
||||
pub file: Option<PathBuf>,
|
||||
|
||||
/// The file that the error occured within.
|
||||
pub file: Option<std::path::PathBuf>,
|
||||
/// The source snippet related to the error, if known.
|
||||
pub snippet: Option<String>,
|
||||
|
||||
/// The problematic string (optional).
|
||||
pub snippet: Option<String>
|
||||
/// An optional underlying error that caused this one.
|
||||
pub source: Option<Box<dyn Error>>,
|
||||
}
|
||||
|
||||
impl LexerError {
|
||||
/// Creates a new `LexerError` with a message, span, and optional context.
|
||||
///
|
||||
/// # Parameters
|
||||
/// - `message`: A human-readable explanation of the error.
|
||||
/// - `span`: The region in the source where the error occurred.
|
||||
/// - `file`: An optional path to the file in which the error occurred.
|
||||
/// - `snippet`: An optional problematic input string.
|
||||
///
|
||||
/// # Returns
|
||||
/// A new instance of `LexerError`.
|
||||
pub fn new<S, T>(
|
||||
message: S,
|
||||
span: Span,
|
||||
file: Option<T>,
|
||||
snippet: Option<S>,
|
||||
) -> Self
|
||||
where
|
||||
S: Into<String>,
|
||||
T: Into<PathBuf>,
|
||||
{
|
||||
LexerError {
|
||||
message: message.into(),
|
||||
span,
|
||||
file: file.map(Into::into),
|
||||
snippet: snippet.map(Into::into),
|
||||
source: None,
|
||||
}
|
||||
}
|
||||
|
||||
/// Creates a `LexerError` from only a message and span.
|
||||
///
|
||||
/// This is useful when file or snippet context is not available.
|
||||
pub fn from_message<S>(message: S, span: Span) -> Self
|
||||
where
|
||||
S: Into<String>,
|
||||
{
|
||||
Self::new(message, span, None::<PathBuf>, None::<S>)
|
||||
}
|
||||
|
||||
impl LexerError
|
||||
{
|
||||
pub fn new<S, T>(message: S, span: Span, file: Option<T>,
|
||||
snippet: Option<S>)
|
||||
-> Self
|
||||
where S: Into<String>,
|
||||
T: Into<std::path::PathBuf>
|
||||
{
|
||||
LexerError { message: message.into(),
|
||||
span,
|
||||
file: file.map(|t| t.into()),
|
||||
snippet: snippet.map(|s| s.into()) }
|
||||
}
|
||||
/// Attaches a snippet of the offending source code.
|
||||
///
|
||||
/// This is helpful for diagnostics and tooling output.
|
||||
pub fn with_snippet<S>(mut self, snippet: S) -> Self
|
||||
where
|
||||
S: Into<String>,
|
||||
{
|
||||
self.snippet = Some(snippet.into());
|
||||
self
|
||||
}
|
||||
|
||||
/// Attaches the path of the file in which the error occurred.
|
||||
pub fn with_file<T>(mut self, file: T) -> Self
|
||||
where
|
||||
T: Into<PathBuf>,
|
||||
{
|
||||
self.file = Some(file.into());
|
||||
self
|
||||
}
|
||||
|
||||
/// Wraps a source error that caused this `LexerError`.
|
||||
///
|
||||
/// This allows you to chain errors for more detailed diagnostics.
|
||||
pub fn with_source<E>(mut self, err: E) -> Self
|
||||
where
|
||||
E: Error + 'static,
|
||||
{
|
||||
self.source = Some(Box::new(err));
|
||||
self
|
||||
}
|
||||
}
|
||||
|
||||
impl std::fmt::Display for LexerError
|
||||
{
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result
|
||||
{
|
||||
match &self.snippet
|
||||
{
|
||||
Some(snippet) =>
|
||||
{
|
||||
write!(f,
|
||||
"LexerError at {}: {} (snippet: '{}')",
|
||||
self.span, self.message, snippet)
|
||||
}
|
||||
None =>
|
||||
{
|
||||
write!(f, "LexerError at {}: {}", self.span, self.message)
|
||||
}
|
||||
}
|
||||
}
|
||||
impl std::fmt::Display for LexerError {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
write!(f, "Lexer error at {}", self.span)?;
|
||||
|
||||
if let Some(file) = &self.file {
|
||||
write!(f, " in file `{}`", file.display())?;
|
||||
}
|
||||
|
||||
write!(f, ": {}", self.message)?;
|
||||
|
||||
if let Some(snippet) = &self.snippet {
|
||||
write!(f, "\n --> Snippet: `{}`", snippet)?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
impl std::error::Error for LexerError {}
|
||||
impl Error for LexerError {
|
||||
/// Returns the underlying cause of this error, if any.
|
||||
fn source(&self) -> Option<&(dyn Error + 'static)> {
|
||||
self.source.as_ref().map(|e| e.as_ref())
|
||||
}
|
||||
}
|
||||
|
12
src/lexer.rs
12
src/lexer.rs
@ -72,13 +72,12 @@ impl Lexer
|
||||
let mut cursor = Position::default();
|
||||
let mut stream = TokenStream::new();
|
||||
|
||||
let input_file = File::open(&path).map_err(|_error| {
|
||||
let input_file = File::open(&path).map_err(|err| {
|
||||
LexerError::new(
|
||||
"Unable to open file for Lexigraphical Analysis.",
|
||||
"Unable to open file for Lexical Analysis.",
|
||||
Span::default(),
|
||||
Some(path.as_ref().to_string_lossy().to_string()),
|
||||
None,
|
||||
)
|
||||
Some(path.as_ref().to_path_buf()),
|
||||
None).with_source(err)
|
||||
})?;
|
||||
|
||||
let reader = BufReader::with_capacity(BUFFER_SIZE, input_file);
|
||||
@ -138,7 +137,8 @@ impl Lexer
|
||||
}
|
||||
|
||||
/// Internal: scans a single line of text into tokens.
|
||||
fn scan(line: &str, stream: &mut TokenStream<TokenType>, cursor: &mut Position)
|
||||
fn scan(line: &str, stream: &mut TokenStream<TokenType>,
|
||||
cursor: &mut Position)
|
||||
{
|
||||
for c in line.chars()
|
||||
{
|
||||
|
@ -25,6 +25,14 @@ pub struct Span
|
||||
|
||||
impl Position
|
||||
{
|
||||
/// Creates a new `Position` with the given line and column.
|
||||
///
|
||||
/// # Arguments
|
||||
/// * `line` - The line number in the source (0-based).
|
||||
/// * `column` - The column number within the line (0-based).
|
||||
///
|
||||
/// # Returns
|
||||
/// A `Position` representing the specified line and column.
|
||||
pub fn new(line: usize, column: usize) -> Self
|
||||
{
|
||||
Position { line, column }
|
||||
@ -51,29 +59,66 @@ impl std::fmt::Display for Position
|
||||
|
||||
impl Span
|
||||
{
|
||||
/// Creates a new `Span` from a starting and ending `Position`.
|
||||
///
|
||||
/// # Arguments
|
||||
/// * `start` - The starting position of the span.
|
||||
/// * `end` - The ending position of the span.
|
||||
///
|
||||
/// # Returns
|
||||
/// A `Span` covering the range from `start` to `end`.
|
||||
pub fn new(start: Position, end: Position) -> Self
|
||||
{
|
||||
Span { start, end }
|
||||
}
|
||||
|
||||
/// Creates a `Span` that covers a single position.
|
||||
///
|
||||
/// Useful for zero-length spans or pinpointing a specific token or
|
||||
/// character.
|
||||
///
|
||||
/// # Arguments
|
||||
/// * `val` - The position to be used as both start and end.
|
||||
///
|
||||
/// # Returns
|
||||
/// A `Span` that starts and ends at `val`.
|
||||
pub fn with_single(val: Position) -> Self
|
||||
{
|
||||
Span { start: val,
|
||||
end: val }
|
||||
}
|
||||
|
||||
/// Merges two spans into one, taking the start of the first
|
||||
/// and the end of the second.
|
||||
///
|
||||
/// # Arguments
|
||||
/// * `a` - The first span.
|
||||
/// * `b` - The second span.
|
||||
///
|
||||
/// # Returns
|
||||
/// A `Span` that starts at `a.start` and ends at `b.end`.
|
||||
///
|
||||
/// # Note
|
||||
/// Assumes that `a` comes before `b` in source order.
|
||||
pub fn merge(a: Span, b: Span) -> Self
|
||||
{
|
||||
Span { start: a.start,
|
||||
end: b.end }
|
||||
}
|
||||
|
||||
pub fn merge_with(&self, other: Span) -> Span {
|
||||
Span {
|
||||
start: self.start,
|
||||
end: other.end,
|
||||
}
|
||||
}
|
||||
/// Merges this span with another, producing a new span
|
||||
/// from `self.start` to `other.end`.
|
||||
///
|
||||
/// # Arguments
|
||||
/// * `other` - Another span to merge with.
|
||||
///
|
||||
/// # Returns
|
||||
/// A new `Span` from the start of `self` to the end of `other`.
|
||||
pub fn merge_with(&self, other: Span) -> Span
|
||||
{
|
||||
Span { start: self.start,
|
||||
end: other.end }
|
||||
}
|
||||
}
|
||||
|
||||
impl Default for Span
|
||||
|
49
src/token.rs
49
src/token.rs
@ -120,6 +120,10 @@ pub struct TokenStreamIterMut<'a, T>
|
||||
|
||||
impl<T> TokenStream<T>
|
||||
{
|
||||
/// Creates a new, empty `TokenStream`.
|
||||
///
|
||||
/// # Returns
|
||||
/// A `TokenStream` with no tokens.
|
||||
pub fn new() -> Self
|
||||
{
|
||||
TokenStream { lexemes: Vec::new(),
|
||||
@ -127,16 +131,31 @@ impl<T> TokenStream<T>
|
||||
locations: Vec::new() }
|
||||
}
|
||||
|
||||
/// Returns the number of tokens in the stream.
|
||||
///
|
||||
/// # Returns
|
||||
/// The number of tokens currently stored.
|
||||
pub fn len(&self) -> usize
|
||||
{
|
||||
self.lexemes.len()
|
||||
}
|
||||
|
||||
/// Checks if the token stream is empty.
|
||||
///
|
||||
/// # Returns
|
||||
/// `true` if there are no tokens, `false` otherwise.
|
||||
pub fn is_empty(&self) -> bool
|
||||
{
|
||||
self.lexemes.is_empty()
|
||||
}
|
||||
|
||||
/// Retrieves an immutable reference to the token at the given index.
|
||||
///
|
||||
/// # Arguments
|
||||
/// * `index` - The position of the token in the stream.
|
||||
///
|
||||
/// # Returns
|
||||
/// `Some(Token)` if the index is valid, otherwise `None`.
|
||||
pub fn get(&self, index: usize) -> Option<Token<'_, T>>
|
||||
{
|
||||
if index < self.lexemes.len()
|
||||
@ -151,12 +170,27 @@ impl<T> TokenStream<T>
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns an iterator over immutable references to the tokens.
|
||||
///
|
||||
/// # Returns
|
||||
/// A `TokenStreamIter` iterator for the stream.
|
||||
pub fn iter(&self) -> TokenStreamIter<'_, T>
|
||||
{
|
||||
TokenStreamIter { stream: self,
|
||||
index: 0 }
|
||||
}
|
||||
|
||||
/// Retrieves a mutable reference to the token at the given index.
|
||||
///
|
||||
/// # Arguments
|
||||
/// * `index` - The position of the token in the stream.
|
||||
///
|
||||
/// # Returns
|
||||
/// `Some(TokenMut)` if the index is valid, otherwise `None`.
|
||||
///
|
||||
/// # Safety
|
||||
/// Uses `unsafe` to split borrows for individual mutable access
|
||||
/// to components of the token without violating Rust’s aliasing rules.
|
||||
pub fn get_mut(&mut self, index: usize) -> Option<TokenMut<'_, T>>
|
||||
{
|
||||
if index < self.lexemes.len()
|
||||
@ -167,7 +201,6 @@ impl<T> TokenStream<T>
|
||||
let variant = &mut self.variants[index] as *mut T;
|
||||
let span = &mut self.locations[index] as *mut Span;
|
||||
|
||||
// Convert &mut String to &mut str safely.
|
||||
unsafe {
|
||||
Some(TokenMut { lexeme: &mut *lexeme.as_mut()
|
||||
.unwrap()
|
||||
@ -182,6 +215,9 @@ impl<T> TokenStream<T>
|
||||
}
|
||||
}
|
||||
|
||||
/// Clears all tokens from the stream.
|
||||
///
|
||||
/// This resets the stream to an empty state.
|
||||
pub fn clear(&mut self)
|
||||
{
|
||||
self.lexemes.clear();
|
||||
@ -189,6 +225,12 @@ impl<T> TokenStream<T>
|
||||
self.locations.clear();
|
||||
}
|
||||
|
||||
/// Pushes a new token onto the end of the stream.
|
||||
///
|
||||
/// # Arguments
|
||||
/// * `lexeme` - The text of the token.
|
||||
/// * `variant` - The token type or value.
|
||||
/// * `span` - The location of the token in the source.
|
||||
pub fn push(&mut self, lexeme: String, variant: T, span: Span)
|
||||
{
|
||||
self.lexemes.push(lexeme);
|
||||
@ -196,6 +238,10 @@ impl<T> TokenStream<T>
|
||||
self.locations.push(span);
|
||||
}
|
||||
|
||||
/// Returns a mutable iterator over the tokens in the stream.
|
||||
///
|
||||
/// # Returns
|
||||
/// A `TokenStreamIterMut` for mutable access to each token component.
|
||||
pub fn iter_mut(&mut self) -> TokenStreamIterMut<'_, T>
|
||||
{
|
||||
TokenStreamIterMut { lexemes: self.lexemes.iter_mut(),
|
||||
@ -204,7 +250,6 @@ impl<T> TokenStream<T>
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
impl<'a, T> IntoIterator for &'a TokenStream<T>
|
||||
{
|
||||
type IntoIter = TokenStreamIter<'a, T>;
|
||||
|
Loading…
x
Reference in New Issue
Block a user