Enhanced the LexerError and Documentation of Types

I went and enhanced the LexerError to now wrap a Rust source Error. This makes it so we can wrapup the possible IO errors we get when trying to open a file and read its contents. I also added some documentation for all the implemented functions.
2025-04-16 20:03:15 -04:00
parent 31290cc86f
commit a926e08061
4 changed files with 216 additions and 58 deletions
--- a/src/error.rs
+++ b/src/error.rs
@ -1,59 +1,127 @@
+use std::{error::Error, path::PathBuf};
 use super::position::Span;


-
-/// An error that has occured during lexigraphical analysis.
-#[derive(Debug, Clone, PartialEq)]
-pub struct LexerError
-{
+/// Represents an error encountered during lexical analysis.
+///
+/// `LexerError` contains contextual information about where the error
+/// occurred in the source input, an optional offending snippet,
+/// the file path (if applicable), and an optional underlying error
+/// that triggered the failure.
+///
+/// It is designed to provide detailed diagnostics for file-based or
+/// in-memory parsing and is compatible with error reporting ecosystems.
+#[derive(Debug)]
+pub struct LexerError {
    /// A human-readable error message.
    pub message: String,

-   /// The start and end of where the error is located in the file.
+    /// The span where the error occurred.
    pub span: Span,

-   /// The file that the error occured within.
-   pub file: Option<std::path::PathBuf>,
+    /// The file that the error occurred in, if known.
+    pub file: Option<PathBuf>,

-   /// The problematic string (optional).
-   pub snippet: Option<String>
+    /// The source snippet related to the error, if known.
+    pub snippet: Option<String>,
+
+    /// An optional underlying error that caused this one.
+    pub source: Option<Box<dyn Error>>,
 }

-
-
-impl LexerError
+impl LexerError {
+    /// Creates a new `LexerError` with a message, span, and optional context.
+    ///
+    /// # Parameters
+    /// - `message`: A human-readable explanation of the error.
+    /// - `span`: The region in the source where the error occurred.
+    /// - `file`: An optional path to the file in which the error occurred.
+    /// - `snippet`: An optional problematic input string.
+    ///
+    /// # Returns
+    /// A new instance of `LexerError`.
+    pub fn new<S, T>(
+        message: S,
+        span: Span,
+        file: Option<T>,
+        snippet: Option<S>,
+    ) -> Self
+    where
+        S: Into<String>,
+        T: Into<PathBuf>,
    {
-   pub fn new<S, T>(message: S, span: Span, file: Option<T>,
-                    snippet: Option<S>)
-                    -> Self
-      where S: Into<String>,
-            T: Into<std::path::PathBuf>
-   {
-      LexerError { message: message.into(),
+        LexerError {
+            message: message.into(),
            span,
-                   file: file.map(|t| t.into()),
-                   snippet: snippet.map(|s| s.into()) }
+            file: file.map(Into::into),
+            snippet: snippet.map(Into::into),
+            source: None,
        }
    }

-impl std::fmt::Display for LexerError
+    /// Creates a `LexerError` from only a message and span.
+    ///
+    /// This is useful when file or snippet context is not available.
+    pub fn from_message<S>(message: S, span: Span) -> Self
+    where
+        S: Into<String>,
    {
-   fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result
-   {
-      match &self.snippet
-      {
-         Some(snippet) =>
-         {
-            write!(f,
-                   "LexerError at {}: {} (snippet: '{}')",
-                   self.span, self.message, snippet)
+        Self::new(message, span, None::<PathBuf>, None::<S>)
    }
-         None =>
+
+    /// Attaches a snippet of the offending source code.
+    ///
+    /// This is helpful for diagnostics and tooling output.
+    pub fn with_snippet<S>(mut self, snippet: S) -> Self
+    where
+        S: Into<String>,
    {
-            write!(f, "LexerError at {}: {}", self.span, self.message)
+        self.snippet = Some(snippet.into());
+        self
    }
+
+    /// Attaches the path of the file in which the error occurred.
+    pub fn with_file<T>(mut self, file: T) -> Self
+    where
+        T: Into<PathBuf>,
+    {
+        self.file = Some(file.into());
+        self
    }
+
+    /// Wraps a source error that caused this `LexerError`.
+    ///
+    /// This allows you to chain errors for more detailed diagnostics.
+    pub fn with_source<E>(mut self, err: E) -> Self
+    where
+        E: Error + 'static,
+    {
+        self.source = Some(Box::new(err));
+        self
    }
 }

-impl std::error::Error for LexerError {}
+impl std::fmt::Display for LexerError {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        write!(f, "Lexer error at {}", self.span)?;
+
+        if let Some(file) = &self.file {
+            write!(f, " in file `{}`", file.display())?;
+        }
+
+        write!(f, ": {}", self.message)?;
+
+        if let Some(snippet) = &self.snippet {
+            write!(f, "\n  --> Snippet: `{}`", snippet)?;
+        }
+
+        Ok(())
+    }
+}
+
+impl Error for LexerError {
+    /// Returns the underlying cause of this error, if any.
+    fn source(&self) -> Option<&(dyn Error + 'static)> {
+        self.source.as_ref().map(|e| e.as_ref())
+    }
+}
--- a/src/lexer.rs
+++ b/src/lexer.rs
@ -72,13 +72,12 @@ impl Lexer
      let mut cursor = Position::default();
      let mut stream = TokenStream::new();

-      let input_file = File::open(&path).map_err(|_error| {
+      let input_file = File::open(&path).map_err(|err| {
                                           LexerError::new(
-                "Unable to open file for Lexigraphical Analysis.",
+                "Unable to open file for Lexical Analysis.",
                Span::default(),
-                Some(path.as_ref().to_string_lossy().to_string()),
-                None,
-            )
+                Some(path.as_ref().to_path_buf()),
+                None).with_source(err)
                                        })?;

      let reader = BufReader::with_capacity(BUFFER_SIZE, input_file);
@ -138,7 +137,8 @@ impl Lexer
   }

   /// Internal: scans a single line of text into tokens.
-   fn scan(line: &str, stream: &mut TokenStream<TokenType>, cursor: &mut Position)
+   fn scan(line: &str, stream: &mut TokenStream<TokenType>,
+           cursor: &mut Position)
   {
      for c in line.chars()
      {
--- a/src/position.rs
+++ b/src/position.rs
@ -25,6 +25,14 @@ pub struct Span

 impl Position
 {
+   /// Creates a new `Position` with the given line and column.
+   ///
+   /// # Arguments
+   /// * `line` - The line number in the source (0-based).
+   /// * `column` - The column number within the line (0-based).
+   ///
+   /// # Returns
+   /// A `Position` representing the specified line and column.
   pub fn new(line: usize, column: usize) -> Self
   {
      Position { line, column }
@ -51,28 +59,65 @@ impl std::fmt::Display for Position

 impl Span
 {
+   /// Creates a new `Span` from a starting and ending `Position`.
+   ///
+   /// # Arguments
+   /// * `start` - The starting position of the span.
+   /// * `end` - The ending position of the span.
+   ///
+   /// # Returns
+   /// A `Span` covering the range from `start` to `end`.
   pub fn new(start: Position, end: Position) -> Self
   {
      Span { start, end }
   }

+   /// Creates a `Span` that covers a single position.
+   ///
+   /// Useful for zero-length spans or pinpointing a specific token or
+   /// character.
+   ///
+   /// # Arguments
+   /// * `val` - The position to be used as both start and end.
+   ///
+   /// # Returns
+   /// A `Span` that starts and ends at `val`.
   pub fn with_single(val: Position) -> Self
   {
      Span { start: val,
             end: val }
   }

+   /// Merges two spans into one, taking the start of the first
+   /// and the end of the second.
+   ///
+   /// # Arguments
+   /// * `a` - The first span.
+   /// * `b` - The second span.
+   ///
+   /// # Returns
+   /// A `Span` that starts at `a.start` and ends at `b.end`.
+   ///
+   /// # Note
+   /// Assumes that `a` comes before `b` in source order.
   pub fn merge(a: Span, b: Span) -> Self
   {
      Span { start: a.start,
             end: b.end }
   }

-   pub fn merge_with(&self, other: Span) -> Span {
-        Span {
-            start: self.start,
-            end: other.end,
-        }
+   /// Merges this span with another, producing a new span
+   /// from `self.start` to `other.end`.
+   ///
+   /// # Arguments
+   /// * `other` - Another span to merge with.
+   ///
+   /// # Returns
+   /// A new `Span` from the start of `self` to the end of `other`.
+   pub fn merge_with(&self, other: Span) -> Span
+   {
+      Span { start: self.start,
+             end: other.end }
   }
 }

--- a/src/token.rs
+++ b/src/token.rs
@ -120,6 +120,10 @@ pub struct TokenStreamIterMut<'a, T>

 impl<T> TokenStream<T>
 {
+   /// Creates a new, empty `TokenStream`.
+   ///
+   /// # Returns
+   /// A `TokenStream` with no tokens.
   pub fn new() -> Self
   {
      TokenStream { lexemes: Vec::new(),
@ -127,16 +131,31 @@ impl<T> TokenStream<T>
                    locations: Vec::new() }
   }

+   /// Returns the number of tokens in the stream.
+   ///
+   /// # Returns
+   /// The number of tokens currently stored.
   pub fn len(&self) -> usize
   {
      self.lexemes.len()
   }

+   /// Checks if the token stream is empty.
+   ///
+   /// # Returns
+   /// `true` if there are no tokens, `false` otherwise.
   pub fn is_empty(&self) -> bool
   {
      self.lexemes.is_empty()
   }

+   /// Retrieves an immutable reference to the token at the given index.
+   ///
+   /// # Arguments
+   /// * `index` - The position of the token in the stream.
+   ///
+   /// # Returns
+   /// `Some(Token)` if the index is valid, otherwise `None`.
   pub fn get(&self, index: usize) -> Option<Token<'_, T>>
   {
      if index < self.lexemes.len()
@ -151,12 +170,27 @@ impl<T> TokenStream<T>
      }
   }

+   /// Returns an iterator over immutable references to the tokens.
+   ///
+   /// # Returns
+   /// A `TokenStreamIter` iterator for the stream.
   pub fn iter(&self) -> TokenStreamIter<'_, T>
   {
      TokenStreamIter { stream: self,
                        index: 0 }
   }

+   /// Retrieves a mutable reference to the token at the given index.
+   ///
+   /// # Arguments
+   /// * `index` - The position of the token in the stream.
+   ///
+   /// # Returns
+   /// `Some(TokenMut)` if the index is valid, otherwise `None`.
+   ///
+   /// # Safety
+   /// Uses `unsafe` to split borrows for individual mutable access
+   /// to components of the token without violating Rust’s aliasing rules.
   pub fn get_mut(&mut self, index: usize) -> Option<TokenMut<'_, T>>
   {
      if index < self.lexemes.len()
@ -167,7 +201,6 @@ impl<T> TokenStream<T>
         let variant = &mut self.variants[index] as *mut T;
         let span = &mut self.locations[index] as *mut Span;

-         // Convert &mut String to &mut str safely.
         unsafe {
            Some(TokenMut { lexeme: &mut *lexeme.as_mut()
                                                .unwrap()
@ -182,6 +215,9 @@ impl<T> TokenStream<T>
      }
   }

+   /// Clears all tokens from the stream.
+   ///
+   /// This resets the stream to an empty state.
   pub fn clear(&mut self)
   {
      self.lexemes.clear();
@ -189,6 +225,12 @@ impl<T> TokenStream<T>
      self.locations.clear();
   }

+   /// Pushes a new token onto the end of the stream.
+   ///
+   /// # Arguments
+   /// * `lexeme` - The text of the token.
+   /// * `variant` - The token type or value.
+   /// * `span` - The location of the token in the source.
   pub fn push(&mut self, lexeme: String, variant: T, span: Span)
   {
      self.lexemes.push(lexeme);
@ -196,6 +238,10 @@ impl<T> TokenStream<T>
      self.locations.push(span);
   }

+   /// Returns a mutable iterator over the tokens in the stream.
+   ///
+   /// # Returns
+   /// A `TokenStreamIterMut` for mutable access to each token component.
   pub fn iter_mut(&mut self) -> TokenStreamIterMut<'_, T>
   {
      TokenStreamIterMut { lexemes: self.lexemes.iter_mut(),
@ -204,7 +250,6 @@ impl<T> TokenStream<T>
   }
 }

-
 impl<'a, T> IntoIterator for &'a TokenStream<T>
 {
   type IntoIter = TokenStreamIter<'a, T>;