diff --git a/src/error.rs b/src/error.rs index 62bd5f0..6d0615f 100644 --- a/src/error.rs +++ b/src/error.rs @@ -1,59 +1,127 @@ +use std::{error::Error, path::PathBuf}; use super::position::Span; +/// Represents an error encountered during lexical analysis. +/// +/// `LexerError` contains contextual information about where the error +/// occurred in the source input, an optional offending snippet, +/// the file path (if applicable), and an optional underlying error +/// that triggered the failure. +/// +/// It is designed to provide detailed diagnostics for file-based or +/// in-memory parsing and is compatible with error reporting ecosystems. +#[derive(Debug)] +pub struct LexerError { + /// A human-readable error message. + pub message: String, -/// An error that has occured during lexigraphical analysis. -#[derive(Debug, Clone, PartialEq)] -pub struct LexerError -{ - /// A human-readable error message. - pub message: String, + /// The span where the error occurred. + pub span: Span, - /// The start and end of where the error is located in the file. - pub span: Span, + /// The file that the error occurred in, if known. + pub file: Option, - /// The file that the error occured within. - pub file: Option, + /// The source snippet related to the error, if known. + pub snippet: Option, - /// The problematic string (optional). - pub snippet: Option + /// An optional underlying error that caused this one. + pub source: Option>, } +impl LexerError { + /// Creates a new `LexerError` with a message, span, and optional context. + /// + /// # Parameters + /// - `message`: A human-readable explanation of the error. + /// - `span`: The region in the source where the error occurred. + /// - `file`: An optional path to the file in which the error occurred. + /// - `snippet`: An optional problematic input string. + /// + /// # Returns + /// A new instance of `LexerError`. + pub fn new( + message: S, + span: Span, + file: Option, + snippet: Option, + ) -> Self + where + S: Into, + T: Into, + { + LexerError { + message: message.into(), + span, + file: file.map(Into::into), + snippet: snippet.map(Into::into), + source: None, + } + } + /// Creates a `LexerError` from only a message and span. + /// + /// This is useful when file or snippet context is not available. + pub fn from_message(message: S, span: Span) -> Self + where + S: Into, + { + Self::new(message, span, None::, None::) + } -impl LexerError -{ - pub fn new(message: S, span: Span, file: Option, - snippet: Option) - -> Self - where S: Into, - T: Into - { - LexerError { message: message.into(), - span, - file: file.map(|t| t.into()), - snippet: snippet.map(|s| s.into()) } - } + /// Attaches a snippet of the offending source code. + /// + /// This is helpful for diagnostics and tooling output. + pub fn with_snippet(mut self, snippet: S) -> Self + where + S: Into, + { + self.snippet = Some(snippet.into()); + self + } + + /// Attaches the path of the file in which the error occurred. + pub fn with_file(mut self, file: T) -> Self + where + T: Into, + { + self.file = Some(file.into()); + self + } + + /// Wraps a source error that caused this `LexerError`. + /// + /// This allows you to chain errors for more detailed diagnostics. + pub fn with_source(mut self, err: E) -> Self + where + E: Error + 'static, + { + self.source = Some(Box::new(err)); + self + } } -impl std::fmt::Display for LexerError -{ - fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result - { - match &self.snippet - { - Some(snippet) => - { - write!(f, - "LexerError at {}: {} (snippet: '{}')", - self.span, self.message, snippet) - } - None => - { - write!(f, "LexerError at {}: {}", self.span, self.message) - } - } - } +impl std::fmt::Display for LexerError { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "Lexer error at {}", self.span)?; + + if let Some(file) = &self.file { + write!(f, " in file `{}`", file.display())?; + } + + write!(f, ": {}", self.message)?; + + if let Some(snippet) = &self.snippet { + write!(f, "\n --> Snippet: `{}`", snippet)?; + } + + Ok(()) + } } -impl std::error::Error for LexerError {} +impl Error for LexerError { + /// Returns the underlying cause of this error, if any. + fn source(&self) -> Option<&(dyn Error + 'static)> { + self.source.as_ref().map(|e| e.as_ref()) + } +} diff --git a/src/lexer.rs b/src/lexer.rs index b3bdc8b..ab0a583 100644 --- a/src/lexer.rs +++ b/src/lexer.rs @@ -72,13 +72,12 @@ impl Lexer let mut cursor = Position::default(); let mut stream = TokenStream::new(); - let input_file = File::open(&path).map_err(|_error| { + let input_file = File::open(&path).map_err(|err| { LexerError::new( - "Unable to open file for Lexigraphical Analysis.", + "Unable to open file for Lexical Analysis.", Span::default(), - Some(path.as_ref().to_string_lossy().to_string()), - None, - ) + Some(path.as_ref().to_path_buf()), + None).with_source(err) })?; let reader = BufReader::with_capacity(BUFFER_SIZE, input_file); @@ -138,7 +137,8 @@ impl Lexer } /// Internal: scans a single line of text into tokens. - fn scan(line: &str, stream: &mut TokenStream, cursor: &mut Position) + fn scan(line: &str, stream: &mut TokenStream, + cursor: &mut Position) { for c in line.chars() { diff --git a/src/position.rs b/src/position.rs index 30f32ae..2ae0a52 100644 --- a/src/position.rs +++ b/src/position.rs @@ -25,6 +25,14 @@ pub struct Span impl Position { + /// Creates a new `Position` with the given line and column. + /// + /// # Arguments + /// * `line` - The line number in the source (0-based). + /// * `column` - The column number within the line (0-based). + /// + /// # Returns + /// A `Position` representing the specified line and column. pub fn new(line: usize, column: usize) -> Self { Position { line, column } @@ -51,29 +59,66 @@ impl std::fmt::Display for Position impl Span { + /// Creates a new `Span` from a starting and ending `Position`. + /// + /// # Arguments + /// * `start` - The starting position of the span. + /// * `end` - The ending position of the span. + /// + /// # Returns + /// A `Span` covering the range from `start` to `end`. pub fn new(start: Position, end: Position) -> Self { Span { start, end } } + /// Creates a `Span` that covers a single position. + /// + /// Useful for zero-length spans or pinpointing a specific token or + /// character. + /// + /// # Arguments + /// * `val` - The position to be used as both start and end. + /// + /// # Returns + /// A `Span` that starts and ends at `val`. pub fn with_single(val: Position) -> Self { Span { start: val, end: val } } + /// Merges two spans into one, taking the start of the first + /// and the end of the second. + /// + /// # Arguments + /// * `a` - The first span. + /// * `b` - The second span. + /// + /// # Returns + /// A `Span` that starts at `a.start` and ends at `b.end`. + /// + /// # Note + /// Assumes that `a` comes before `b` in source order. pub fn merge(a: Span, b: Span) -> Self { Span { start: a.start, end: b.end } } - pub fn merge_with(&self, other: Span) -> Span { - Span { - start: self.start, - end: other.end, - } - } + /// Merges this span with another, producing a new span + /// from `self.start` to `other.end`. + /// + /// # Arguments + /// * `other` - Another span to merge with. + /// + /// # Returns + /// A new `Span` from the start of `self` to the end of `other`. + pub fn merge_with(&self, other: Span) -> Span + { + Span { start: self.start, + end: other.end } + } } impl Default for Span diff --git a/src/token.rs b/src/token.rs index 49bccae..0a28ff5 100644 --- a/src/token.rs +++ b/src/token.rs @@ -120,6 +120,10 @@ pub struct TokenStreamIterMut<'a, T> impl TokenStream { + /// Creates a new, empty `TokenStream`. + /// + /// # Returns + /// A `TokenStream` with no tokens. pub fn new() -> Self { TokenStream { lexemes: Vec::new(), @@ -127,16 +131,31 @@ impl TokenStream locations: Vec::new() } } + /// Returns the number of tokens in the stream. + /// + /// # Returns + /// The number of tokens currently stored. pub fn len(&self) -> usize { self.lexemes.len() } + /// Checks if the token stream is empty. + /// + /// # Returns + /// `true` if there are no tokens, `false` otherwise. pub fn is_empty(&self) -> bool { self.lexemes.is_empty() } + /// Retrieves an immutable reference to the token at the given index. + /// + /// # Arguments + /// * `index` - The position of the token in the stream. + /// + /// # Returns + /// `Some(Token)` if the index is valid, otherwise `None`. pub fn get(&self, index: usize) -> Option> { if index < self.lexemes.len() @@ -151,12 +170,27 @@ impl TokenStream } } + /// Returns an iterator over immutable references to the tokens. + /// + /// # Returns + /// A `TokenStreamIter` iterator for the stream. pub fn iter(&self) -> TokenStreamIter<'_, T> { TokenStreamIter { stream: self, index: 0 } } + /// Retrieves a mutable reference to the token at the given index. + /// + /// # Arguments + /// * `index` - The position of the token in the stream. + /// + /// # Returns + /// `Some(TokenMut)` if the index is valid, otherwise `None`. + /// + /// # Safety + /// Uses `unsafe` to split borrows for individual mutable access + /// to components of the token without violating Rust’s aliasing rules. pub fn get_mut(&mut self, index: usize) -> Option> { if index < self.lexemes.len() @@ -167,7 +201,6 @@ impl TokenStream let variant = &mut self.variants[index] as *mut T; let span = &mut self.locations[index] as *mut Span; - // Convert &mut String to &mut str safely. unsafe { Some(TokenMut { lexeme: &mut *lexeme.as_mut() .unwrap() @@ -182,6 +215,9 @@ impl TokenStream } } + /// Clears all tokens from the stream. + /// + /// This resets the stream to an empty state. pub fn clear(&mut self) { self.lexemes.clear(); @@ -189,6 +225,12 @@ impl TokenStream self.locations.clear(); } + /// Pushes a new token onto the end of the stream. + /// + /// # Arguments + /// * `lexeme` - The text of the token. + /// * `variant` - The token type or value. + /// * `span` - The location of the token in the source. pub fn push(&mut self, lexeme: String, variant: T, span: Span) { self.lexemes.push(lexeme); @@ -196,6 +238,10 @@ impl TokenStream self.locations.push(span); } + /// Returns a mutable iterator over the tokens in the stream. + /// + /// # Returns + /// A `TokenStreamIterMut` for mutable access to each token component. pub fn iter_mut(&mut self) -> TokenStreamIterMut<'_, T> { TokenStreamIterMut { lexemes: self.lexemes.iter_mut(), @@ -204,7 +250,6 @@ impl TokenStream } } - impl<'a, T> IntoIterator for &'a TokenStream { type IntoIter = TokenStreamIter<'a, T>;