Just some basic updating and cleaning up.

- Added comments. - Ran cargo fmt. - Updated the versioning.
2025-04-22 02:18:12 -04:00
parent cd50b53be5
commit f5780f50c2
7 changed files with 330 additions and 237 deletions
--- a/src/error.rs
+++ b/src/error.rs
@ -1,4 +1,6 @@
-use std::{error::Error, path::PathBuf};
+use std::error::Error;
+use std::path::PathBuf;
+
 use super::position::Span;


@ -12,116 +14,114 @@ use super::position::Span;
 /// It is designed to provide detailed diagnostics for file-based or
 /// in-memory parsing and is compatible with error reporting ecosystems.
 #[derive(Debug)]
-pub struct LexerError {
-    /// A human-readable error message.
-    pub message: String,
+pub struct LexerError
+{
+   /// A human-readable error message.
+   pub message: String,

-    /// The span where the error occurred.
-    pub span: Span,
+   /// The span where the error occurred.
+   pub span: Span,

-    /// The file that the error occurred in, if known.
-    pub file: Option<PathBuf>,
+   /// The file that the error occurred in, if known.
+   pub file: Option<PathBuf>,

-    /// The source snippet related to the error, if known.
-    pub snippet: Option<String>,
+   /// The source snippet related to the error, if known.
+   pub snippet: Option<String>,

-    /// An optional underlying error that caused this one.
-    pub source: Option<Box<dyn Error>>,
+   /// An optional underlying error that caused this one.
+   pub source: Option<Box<dyn Error>>
 }

-impl LexerError {
-    /// Creates a new `LexerError` with a message, span, and optional context.
-    ///
-    /// # Parameters
-    /// - `message`: A human-readable explanation of the error.
-    /// - `span`: The region in the source where the error occurred.
-    /// - `file`: An optional path to the file in which the error occurred.
-    /// - `snippet`: An optional problematic input string.
-    ///
-    /// # Returns
-    /// A new instance of `LexerError`.
-    pub fn new<S, T>(
-        message: S,
-        span: Span,
-        file: Option<T>,
-        snippet: Option<S>,
-    ) -> Self
-    where
-        S: Into<String>,
-        T: Into<PathBuf>,
-    {
-        LexerError {
-            message: message.into(),
-            span,
-            file: file.map(Into::into),
-            snippet: snippet.map(Into::into),
-            source: None,
-        }
-    }
+impl LexerError
+{
+   /// Creates a new `LexerError` with a message, span, and optional context.
+   ///
+   /// # Parameters
+   /// - `message`: A human-readable explanation of the error.
+   /// - `span`: The region in the source where the error occurred.
+   /// - `file`: An optional path to the file in which the error occurred.
+   /// - `snippet`: An optional problematic input string.
+   ///
+   /// # Returns
+   /// A new instance of `LexerError`.
+   pub fn new<S, T>(message: S, span: Span, file: Option<T>,
+                    snippet: Option<S>)
+                    -> Self
+      where S: Into<String>,
+            T: Into<PathBuf>
+   {
+      LexerError { message: message.into(),
+                   span,
+                   file: file.map(Into::into),
+                   snippet: snippet.map(Into::into),
+                   source: None }
+   }

-    /// Creates a `LexerError` from only a message and span.
-    ///
-    /// This is useful when file or snippet context is not available.
-    pub fn from_message<S>(message: S, span: Span) -> Self
-    where
-        S: Into<String>,
-    {
-        Self::new(message, span, None::<PathBuf>, None::<S>)
-    }
+   /// Creates a `LexerError` from only a message and span.
+   ///
+   /// This is useful when file or snippet context is not available.
+   pub fn from_message<S>(message: S, span: Span) -> Self
+      where S: Into<String>
+   {
+      Self::new(message, span, None::<PathBuf>, None::<S>)
+   }

-    /// Attaches a snippet of the offending source code.
-    ///
-    /// This is helpful for diagnostics and tooling output.
-    pub fn with_snippet<S>(mut self, snippet: S) -> Self
-    where
-        S: Into<String>,
-    {
-        self.snippet = Some(snippet.into());
-        self
-    }
+   /// Attaches a snippet of the offending source code.
+   ///
+   /// This is helpful for diagnostics and tooling output.
+   pub fn with_snippet<S>(mut self, snippet: S) -> Self
+      where S: Into<String>
+   {
+      self.snippet = Some(snippet.into());
+      self
+   }

-    /// Attaches the path of the file in which the error occurred.
-    pub fn with_file<T>(mut self, file: T) -> Self
-    where
-        T: Into<PathBuf>,
-    {
-        self.file = Some(file.into());
-        self
-    }
+   /// Attaches the path of the file in which the error occurred.
+   pub fn with_file<T>(mut self, file: T) -> Self
+      where T: Into<PathBuf>
+   {
+      self.file = Some(file.into());
+      self
+   }

-    /// Wraps a source error that caused this `LexerError`.
-    ///
-    /// This allows you to chain errors for more detailed diagnostics.
-    pub fn with_source<E>(mut self, err: E) -> Self
-    where
-        E: Error + 'static,
-    {
-        self.source = Some(Box::new(err));
-        self
-    }
+   /// Wraps a source error that caused this `LexerError`.
+   ///
+   /// This allows you to chain errors for more detailed diagnostics.
+   pub fn with_source<E>(mut self, err: E) -> Self
+      where E: Error + 'static
+   {
+      self.source = Some(Box::new(err));
+      self
+   }
 }

-impl std::fmt::Display for LexerError {
-    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        write!(f, "Lexer error at {}", self.span)?;
+impl std::fmt::Display for LexerError
+{
+   fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result
+   {
+      write!(f, "Lexer error at {}", self.span)?;

-        if let Some(file) = &self.file {
-            write!(f, " in file `{}`", file.display())?;
-        }
+      if let Some(file) = &self.file
+      {
+         write!(f, " in file `{}`", file.display())?;
+      }

-        write!(f, ": {}", self.message)?;
+      write!(f, ": {}", self.message)?;

-        if let Some(snippet) = &self.snippet {
-            write!(f, "\n  --> Snippet: `{}`", snippet)?;
-        }
+      if let Some(snippet) = &self.snippet
+      {
+         write!(f, "\n  --> Snippet: `{}`", snippet)?;
+      }

-        Ok(())
-    }
+      Ok(())
+   }
 }

-impl Error for LexerError {
-    /// Returns the underlying cause of this error, if any.
-    fn source(&self) -> Option<&(dyn Error + 'static)> {
-        self.source.as_ref().map(|e| e.as_ref())
-    }
+impl Error for LexerError
+{
+   /// Returns the underlying cause of this error, if any.
+   fn source(&self) -> Option<&(dyn Error + 'static)>
+   {
+      self.source.as_ref().map(|e| e.as_ref())
+   }
 }
--- a/src/lexer.rs
+++ b/src/lexer.rs
@ -7,24 +7,25 @@ use super::token::{TokenStream, TokenType};



-/// The size of data chunks to read from a file. This was arbitrarily chosen to
-/// be 1mb.
+/// The size of data chunks to read from a file. This is an arbitrary choice,
+/// set to 1MB.
 const BUFFER_SIZE: usize = 1024 * 1024;



-/// The `Lexer` is the core component responsible for performing
-/// lexicographical analysis on a text file.
+/// The `Lexer` struct is responsible for performing lexical analysis
+/// (tokenization) on text.
 ///
-/// It reads input from a file character-by-character, generating a stream
-/// of base tokens such as text, numbers, whitespace, symbols, and newlines.
-/// These tokens are accumulated into a `TokenStream`, which is a flat,
-/// cache-friendly data structure.
+/// It processes input from a file or string character-by-character and
+/// generates a stream of tokens, such as text, numbers, whitespace, symbols,
+/// and newlines. These tokens are accumulated into a `TokenStream`, which is a
+/// flat, cache-friendly data structure designed for efficient iteration.
 ///
-/// After tokenization, the lexer applies a user-provided `transform` function
-/// to each token in the stream, allowing consumers of the library to convert
-/// base tokens into richer, domain-specific token types (e.g. Markdown
-/// elements, syntax trees, or custom DSL tokens).
+/// After the base tokens are generated, the `Lexer` allows for transformation
+/// of these tokens into richer, domain-specific types via a user-provided
+/// `transform` function. This transformation can be used to convert base tokens
+/// into specific elements of a Markdown syntax tree, custom DSL tokens, or any
+/// other custom format you need.
 ///
 /// # Example
 ///
@ -38,32 +39,51 @@ const BUFFER_SIZE: usize = 1024 * 1024;
 ///
 /// let tokens = Lexer::scan_text("Runes += 42", transform).unwrap();
 ///
-/// // The tuple here is from the transform functions return type.
 /// for token in &tokens
 /// {
 ///    println!("{}", token);
 /// }
 /// ```
 ///
-/// # Design Notes
-///
-/// - Uses a flat `TokenStream` to improve iteration performance and reduce heap
+/// # Design Considerations
+/// - Utilizes a flat `TokenStream` to improve performance and reduce heap
 ///   overhead.
-/// - Consolidates contiguous characters into compound tokens (e.g. multi-digit
+/// - Consolidates contiguous characters into compound tokens (e.g., multi-digit
 ///   numbers).
-/// - Easily extensible via the `transform` function to support higher-level
-///   parsing tasks.
+/// - Extensible via the `transform` function, enabling the creation of
+///   higher-level constructs, like Markdown elements or syntax trees for a
+///   custom DSL.
 ///
-/// # Errors
-///
-/// Returns a `LexerError` if the file cannot be opened or read.
+/// # Error Handling
+/// The lexer will return a `LexerError` if the input file cannot be opened or
+/// read. Errors include issues such as missing files, read failures, or invalid
+/// input formats.
 pub enum Lexer {}



 impl Lexer
 {
-   /// Scans a file and produces a vector of transformed tokens.
+   /// Scans a file and generates a vector of transformed tokens based on the
+   /// provided `transform` function.
+   ///
+   /// This method opens a file from the given `path`, reads the file line by
+   /// line, and converts the input into a stream of tokens. The tokens are
+   /// then passed to the `transform` function, which allows users to map
+   /// base tokens into domain-specific types.
+   ///
+   /// # Parameters
+   /// - `path`: A path to the file to be lexically analyzed.
+   /// - `transform`: A function that takes a `TokenStream<TokenType>` and
+   ///   transforms it into a `TokenStream<T>` where `T` is a domain-specific
+   ///   type.
+   ///
+   /// # Returns
+   /// A `Result<TokenStream<T>, LexerError>` where `T` is the transformed token
+   /// type, or an error.
+   ///
+   /// # Errors
+   /// Returns a `LexerError` if the file cannot be opened or read.
   pub fn scan_file<P, F, T>(path: P, transform: F)
                             -> Result<TokenStream<T>, LexerError>
      where P: AsRef<std::path::Path>,
@ -82,6 +102,7 @@ impl Lexer

      let reader = BufReader::with_capacity(BUFFER_SIZE, input_file);

+      // Read the file line by line.
      for line in reader.lines()
      {
         match line
@ -93,7 +114,7 @@ impl Lexer
            Err(_) =>
            {
               return Err(LexerError::new("Unable to read line during \
-                                           Lexigraphical Analysis.",
+                                           Lexical Analysis.",
                                          Span::default(),
                                          Some(path.as_ref()
                                                   .to_string_lossy()
@ -102,10 +123,10 @@ impl Lexer
            }
         }

+         // Add the newline token after each line.
         stream.push("\n".to_string(),
                     TokenType::Newline,
                     Span::with_single(cursor));
-
         cursor.line += 1;
         cursor.column = 0;
      }
@ -113,7 +134,22 @@ impl Lexer
      Ok(transform(&stream))
   }

-   /// Scans a full in-memory string and returns transformed tokens.
+   /// Scans a full in-memory string and produces transformed tokens.
+   ///
+   /// This method tokenizes the input string `text` and returns the transformed
+   /// tokens using the provided `transform` function. It's a convenient way
+   /// to perform lexical analysis on in-memory strings without needing to
+   /// read from a file.
+   ///
+   /// # Parameters
+   /// - `text`: A string slice representing the in-memory input text to
+   ///   analyze.
+   /// - `transform`: A function that transforms the base tokens into
+   ///   domain-specific types.
+   ///
+   /// # Returns
+   /// A `Result<TokenStream<T>, LexerError>` where `T` is the transformed token
+   /// type, or an error.
   pub fn scan_text<F, T>(text: &str, transform: F)
                          -> Result<TokenStream<T>, LexerError>
      where F: FnOnce(&TokenStream<TokenType>) -> TokenStream<T>
@ -121,14 +157,15 @@ impl Lexer
      let mut cursor = Position::default();
      let mut stream = TokenStream::new();

+      // Process each line in the input string.
      for line in text.lines()
      {
         Self::scan(line, &mut stream, &mut cursor);

+         // Add the newline token after each line.
         stream.push("\n".to_string(),
                     TokenType::Newline,
                     Span::with_single(cursor));
-
         cursor.line += 1;
         cursor.column = 0;
      }
@ -136,15 +173,28 @@ impl Lexer
      Ok(transform(&stream))
   }

-   /// Internal: scans a single line of text into tokens.
+   /// Internal method that scans a single line of text into tokens.
+   ///
+   /// This method processes each character of a line and generates the
+   /// corresponding token. It handles cases like numeric tokens, text
+   /// tokens, symbols, and whitespace.
+   ///
+   /// # Parameters
+   /// - `line`: A line of text to be lexically analyzed.
+   /// - `stream`: A mutable reference to the token stream where the generated
+   ///   tokens will be pushed.
+   /// - `cursor`: A mutable reference to the cursor position, which tracks the
+   ///   current position in the input.
   fn scan(line: &str, stream: &mut TokenStream<TokenType>,
           cursor: &mut Position)
   {
      for c in line.chars()
      {
+         // Get the token type based on the character.
         let variant = get_token_type(c);
         let last = stream.len().saturating_sub(1);

+         // Handle token merging for contiguous tokens like numbers or text.
         if !stream.is_empty() &&
            variant == stream.variants[last] &&
            (variant == TokenType::Numeric || variant == TokenType::Text)
@ -154,6 +204,7 @@ impl Lexer
         }
         else
         {
+            // Add a new token to the stream.
            stream.push(c.to_string(), variant, Span::with_single(*cursor));
         }

@ -164,6 +215,18 @@ impl Lexer



+/// Determines the type of a token based on the current character.
+///
+/// This helper function is responsible for identifying whether the current
+/// character is part of a known token type such as numeric, text, whitespace,
+/// or symbol.
+///
+/// # Parameters
+/// - `curr_char`: The current character to analyze.
+///
+/// # Returns
+/// A `TokenType` corresponding to the character type (e.g., `Numeric`, `Text`,
+/// `Whitespace`, etc.).
 fn get_token_type(curr_char: char) -> TokenType
 {
   match curr_char