Added the new LexerResult to use as an Error.

The code now returns a result that has the TokenStream. For now it is returning any Error, but later there should be a lexical analysis error type.
2025-10-21 11:43:11 -04:00
parent c950b70c9b
commit 6984455c12
4 changed files with 108 additions and 26 deletions
--- a/src/error.rs
+++ b/src/error.rs
@ -29,7 +29,7 @@ pub struct LexerError
   pub snippet: Option<String>,

   /// An optional underlying error that caused this one.
-   pub source: Option<Box<dyn Error>>
+   pub source: Option<Box<dyn Error + Send + Sync>>
 }

 impl LexerError
@ -88,7 +88,7 @@ impl LexerError
   ///
   /// This allows you to chain errors for more detailed diagnostics.
   pub fn with_source<E>(mut self, err: E) -> Self
-      where E: Error + 'static
+      where E: Error + Send + Sync + 'static
   {
      self.source = Some(Box::new(err));
      self
@ -122,6 +122,6 @@ impl Error for LexerError
   /// Returns the underlying cause of this error, if any.
   fn source(&self) -> Option<&(dyn Error + 'static)>
   {
-      self.source.as_ref().map(|e| e.as_ref())
+      self.source.as_deref().map(|e| e as &(dyn Error + 'static))
   }
 }
--- a/src/lexer.rs
+++ b/src/lexer.rs
@ -12,6 +12,10 @@ use super::token::{TokenStream, TokenType};
 const BUFFER_SIZE: usize = 1024 * 1024;


+/// The Result returned from a lexing function.
+pub type LexerResult<T> = Result<TokenStream<T>, Box<dyn std::error::Error + Send + Sync>>;
+
+

 /// The `Lexer` struct is responsible for performing lexical analysis
 /// (tokenization) on text.
@ -30,14 +34,14 @@ const BUFFER_SIZE: usize = 1024 * 1024;
 /// # Example
 ///
 /// ```rust
-/// use rune::{Lexer, TokenStream, TokenType};
+/// use rune::{Lexer, LexerError,  LexerResult, TokenStream, TokenType};
 ///
-/// fn transform(tokens: &TokenStream<TokenType>) -> TokenStream<TokenType>
+/// fn transform(tokens: &TokenStream<TokenType>) -> LexerResult<TokenType>
 /// {
-///    tokens.clone()
+///    Ok(tokens.clone())
 /// }
 ///
-/// let tokens = Lexer::scan_text("Runes += 42", transform).unwrap();
+/// let tokens = Lexer::scan_text::<_, TokenType, LexerError>("Runes += 42", transform).unwrap();
 ///
 /// for token in &tokens
 /// {
@ -85,19 +89,19 @@ impl Lexer
   /// # Errors
   /// Returns a `LexerError` if the file cannot be opened or read.
   pub fn scan_file<P, F, T>(path: P, transform: F)
-                             -> Result<TokenStream<T>, LexerError>
+                             -> LexerResult<T>
      where P: AsRef<std::path::Path>,
-            F: FnOnce(&TokenStream<TokenType>) -> TokenStream<T>
+            F: FnOnce(&TokenStream<TokenType>) -> LexerResult<T>
   {
      let mut cursor = Position::default();
      let mut stream = TokenStream::new();

      let input_file = File::open(&path).map_err(|err| {
-                                           LexerError::new(
+                                           Box::new(LexerError::new(
                "Unable to open file for Lexical Analysis.",
                Span::default(),
                Some(path.as_ref().to_path_buf()),
-                None).with_source(err)
+                None).with_source(err))
                                        })?;

      let reader = BufReader::with_capacity(BUFFER_SIZE, input_file);
@ -113,13 +117,13 @@ impl Lexer
            }
            Err(_) =>
            {
-               return Err(LexerError::new("Unable to read line during \
+               return Err(Box::new(LexerError::new("Unable to read line during \
                                           Lexical Analysis.",
                                          Span::default(),
                                          Some(path.as_ref()
                                                   .to_string_lossy()
                                                   .to_string()),
-                                          None));
+                                          None)));
            }
         }

@ -131,7 +135,7 @@ impl Lexer
         cursor.column = 0;
      }

-      Ok(transform(&stream))
+      transform(&stream)
   }

   /// Scans a full in-memory string and produces transformed tokens.
@ -150,9 +154,9 @@ impl Lexer
   /// # Returns
   /// A `Result<TokenStream<T>, LexerError>` where `T` is the transformed token
   /// type, or an error.
-   pub fn scan_text<F, T>(text: &str, transform: F)
-                          -> Result<TokenStream<T>, LexerError>
-      where F: FnOnce(&TokenStream<TokenType>) -> TokenStream<T>
+   pub fn scan_text<F, T, E>(text: &str, transform: F)
+                          -> LexerResult<T>
+      where F: FnOnce(&TokenStream<TokenType>) -> LexerResult<T>
   {
      let mut cursor = Position::default();
      let mut stream = TokenStream::new();
@ -177,7 +181,7 @@ impl Lexer
         stream.pop();
      }

-      Ok(transform(&stream))
+      transform(&stream)
   }

   /// Internal method that scans a single line of text into tokens.
@ -246,3 +250,79 @@ fn get_token_type(curr_char: char) -> TokenType
      _ => TokenType::Unknown
   }
 }
+
+
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::token::{TokenType, TokenStream}; // Adjust import as needed
+
+    // Basic transform function that does nothing — just clones the original stream.
+    fn passthrough_transform(tokens: &TokenStream<TokenType>) -> LexerResult<TokenType> {
+        Ok(tokens.clone())
+    }
+
+    #[test]
+    fn test_basic_text_scan() {
+        let input = "hello world";
+        let result = Lexer::scan_text::<_, TokenType, LexerError>(input, passthrough_transform).unwrap();
+
+        let types: Vec<_> = result.variants.iter().cloned().collect();
+        let expected = vec![
+            TokenType::Text, TokenType::Whitespace, TokenType::Text
+        ];
+
+        assert_eq!(types, expected);
+    }
+
+    #[test]
+    fn test_numeric_and_symbol_scan() {
+        let input = "123 + 456";
+        let result = Lexer::scan_text::<_, TokenType, LexerError>(input, passthrough_transform).unwrap();
+
+        let types: Vec<_> = result.variants.iter().cloned().collect();
+        let expected = vec![
+            TokenType::Numeric,
+            TokenType::Whitespace,
+            TokenType::Symbol,
+            TokenType::Whitespace,
+            TokenType::Numeric
+        ];
+
+        assert_eq!(types, expected);
+    }
+
+    #[test]
+    fn test_multiple_lines() {
+        let input = "abc\n123";
+        let result = Lexer::scan_text::<_, TokenType, LexerError>(input, passthrough_transform).unwrap();
+
+        let types: Vec<_> = result.variants.iter().cloned().collect();
+        let expected = vec![
+            TokenType::Text,
+            TokenType::Newline,
+            TokenType::Numeric
+        ];
+
+        assert_eq!(types, expected);
+    }
+
+    #[test]
+    fn test_trailing_newline_handling() {
+        let input = "abc";
+        let result = Lexer::scan_text::<_, TokenType, LexerError>(input, passthrough_transform).unwrap();
+
+        // Should NOT end in a Newline since no trailing newline in input
+        assert_ne!(result.lexemes.last().unwrap(), "\n");
+    }
+
+    #[test]
+    fn test_empty_input() {
+        let input = "";
+        let result = Lexer::scan_text::<_, TokenType, LexerError>(input, passthrough_transform).unwrap();
+
+        assert!(result.lexemes.is_empty());
+    }
+}
+