From 6984455c1241f81702ae0bd9f629aa032dbde15f Mon Sep 17 00:00:00 2001 From: Myrddin Dundragon Date: Tue, 21 Oct 2025 11:43:11 -0400 Subject: [PATCH] Added the new LexerResult to use as an Error. The code now returns a result that has the TokenStream. For now it is returning any Error, but later there should be a lexical analysis error type. --- examples/basic.rs | 6 +-- src/error.rs | 6 +-- src/lexer.rs | 110 +++++++++++++++++++++++++++++++++++++++------- tests/lexer.rs | 12 ++--- 4 files changed, 108 insertions(+), 26 deletions(-) diff --git a/examples/basic.rs b/examples/basic.rs index 8e5f138..589f3f3 100644 --- a/examples/basic.rs +++ b/examples/basic.rs @@ -46,7 +46,7 @@ impl std::fmt::Display for MarkdownTokenType // Define how you want to interpret base tokens pub fn transform(input: &TokenStream) - -> TokenStream + -> Result, Box> { let mut output = TokenStream::new(); @@ -157,11 +157,11 @@ pub fn transform(input: &TokenStream) i += 1; } - output + Ok(output) } -fn main() -> Result<(), Box> +fn main() -> Result<(), Box> { let mut path = PathBuf::from(env!("CARGO_MANIFEST_DIR")); path.push("examples/example.md"); diff --git a/src/error.rs b/src/error.rs index 96b69cb..c22b647 100644 --- a/src/error.rs +++ b/src/error.rs @@ -29,7 +29,7 @@ pub struct LexerError pub snippet: Option, /// An optional underlying error that caused this one. - pub source: Option> + pub source: Option> } impl LexerError @@ -88,7 +88,7 @@ impl LexerError /// /// This allows you to chain errors for more detailed diagnostics. pub fn with_source(mut self, err: E) -> Self - where E: Error + 'static + where E: Error + Send + Sync + 'static { self.source = Some(Box::new(err)); self @@ -122,6 +122,6 @@ impl Error for LexerError /// Returns the underlying cause of this error, if any. fn source(&self) -> Option<&(dyn Error + 'static)> { - self.source.as_ref().map(|e| e.as_ref()) + self.source.as_deref().map(|e| e as &(dyn Error + 'static)) } } diff --git a/src/lexer.rs b/src/lexer.rs index 85a1360..163fc0d 100644 --- a/src/lexer.rs +++ b/src/lexer.rs @@ -12,6 +12,10 @@ use super::token::{TokenStream, TokenType}; const BUFFER_SIZE: usize = 1024 * 1024; +/// The Result returned from a lexing function. +pub type LexerResult = Result, Box>; + + /// The `Lexer` struct is responsible for performing lexical analysis /// (tokenization) on text. @@ -30,14 +34,14 @@ const BUFFER_SIZE: usize = 1024 * 1024; /// # Example /// /// ```rust -/// use rune::{Lexer, TokenStream, TokenType}; +/// use rune::{Lexer, LexerError, LexerResult, TokenStream, TokenType}; /// -/// fn transform(tokens: &TokenStream) -> TokenStream +/// fn transform(tokens: &TokenStream) -> LexerResult /// { -/// tokens.clone() +/// Ok(tokens.clone()) /// } /// -/// let tokens = Lexer::scan_text("Runes += 42", transform).unwrap(); +/// let tokens = Lexer::scan_text::<_, TokenType, LexerError>("Runes += 42", transform).unwrap(); /// /// for token in &tokens /// { @@ -85,19 +89,19 @@ impl Lexer /// # Errors /// Returns a `LexerError` if the file cannot be opened or read. pub fn scan_file(path: P, transform: F) - -> Result, LexerError> + -> LexerResult where P: AsRef, - F: FnOnce(&TokenStream) -> TokenStream + F: FnOnce(&TokenStream) -> LexerResult { let mut cursor = Position::default(); let mut stream = TokenStream::new(); let input_file = File::open(&path).map_err(|err| { - LexerError::new( + Box::new(LexerError::new( "Unable to open file for Lexical Analysis.", Span::default(), Some(path.as_ref().to_path_buf()), - None).with_source(err) + None).with_source(err)) })?; let reader = BufReader::with_capacity(BUFFER_SIZE, input_file); @@ -113,13 +117,13 @@ impl Lexer } Err(_) => { - return Err(LexerError::new("Unable to read line during \ + return Err(Box::new(LexerError::new("Unable to read line during \ Lexical Analysis.", Span::default(), Some(path.as_ref() .to_string_lossy() .to_string()), - None)); + None))); } } @@ -131,7 +135,7 @@ impl Lexer cursor.column = 0; } - Ok(transform(&stream)) + transform(&stream) } /// Scans a full in-memory string and produces transformed tokens. @@ -150,9 +154,9 @@ impl Lexer /// # Returns /// A `Result, LexerError>` where `T` is the transformed token /// type, or an error. - pub fn scan_text(text: &str, transform: F) - -> Result, LexerError> - where F: FnOnce(&TokenStream) -> TokenStream + pub fn scan_text(text: &str, transform: F) + -> LexerResult + where F: FnOnce(&TokenStream) -> LexerResult { let mut cursor = Position::default(); let mut stream = TokenStream::new(); @@ -177,7 +181,7 @@ impl Lexer stream.pop(); } - Ok(transform(&stream)) + transform(&stream) } /// Internal method that scans a single line of text into tokens. @@ -246,3 +250,79 @@ fn get_token_type(curr_char: char) -> TokenType _ => TokenType::Unknown } } + + + +#[cfg(test)] +mod tests { + use super::*; + use crate::token::{TokenType, TokenStream}; // Adjust import as needed + + // Basic transform function that does nothing — just clones the original stream. + fn passthrough_transform(tokens: &TokenStream) -> LexerResult { + Ok(tokens.clone()) + } + + #[test] + fn test_basic_text_scan() { + let input = "hello world"; + let result = Lexer::scan_text::<_, TokenType, LexerError>(input, passthrough_transform).unwrap(); + + let types: Vec<_> = result.variants.iter().cloned().collect(); + let expected = vec![ + TokenType::Text, TokenType::Whitespace, TokenType::Text + ]; + + assert_eq!(types, expected); + } + + #[test] + fn test_numeric_and_symbol_scan() { + let input = "123 + 456"; + let result = Lexer::scan_text::<_, TokenType, LexerError>(input, passthrough_transform).unwrap(); + + let types: Vec<_> = result.variants.iter().cloned().collect(); + let expected = vec![ + TokenType::Numeric, + TokenType::Whitespace, + TokenType::Symbol, + TokenType::Whitespace, + TokenType::Numeric + ]; + + assert_eq!(types, expected); + } + + #[test] + fn test_multiple_lines() { + let input = "abc\n123"; + let result = Lexer::scan_text::<_, TokenType, LexerError>(input, passthrough_transform).unwrap(); + + let types: Vec<_> = result.variants.iter().cloned().collect(); + let expected = vec![ + TokenType::Text, + TokenType::Newline, + TokenType::Numeric + ]; + + assert_eq!(types, expected); + } + + #[test] + fn test_trailing_newline_handling() { + let input = "abc"; + let result = Lexer::scan_text::<_, TokenType, LexerError>(input, passthrough_transform).unwrap(); + + // Should NOT end in a Newline since no trailing newline in input + assert_ne!(result.lexemes.last().unwrap(), "\n"); + } + + #[test] + fn test_empty_input() { + let input = ""; + let result = Lexer::scan_text::<_, TokenType, LexerError>(input, passthrough_transform).unwrap(); + + assert!(result.lexemes.is_empty()); + } +} + diff --git a/tests/lexer.rs b/tests/lexer.rs index c7b8480..cae3c60 100644 --- a/tests/lexer.rs +++ b/tests/lexer.rs @@ -15,7 +15,8 @@ struct TestCase<'a> -fn dummy_transform(tokens: &TokenStream) -> TokenStream +fn dummy_transform(tokens: &TokenStream) + -> Result, Box> { /* let mut stream: TokenStream<(TokenType, String)> = TokenStream::default(); @@ -30,7 +31,7 @@ fn dummy_transform(tokens: &TokenStream) -> TokenStream stream */ - tokens.clone() + Ok(tokens.clone()) } fn write_temp_file(name: &str, content: &str) -> PathBuf @@ -56,8 +57,9 @@ fn cleanup_temp_file(path: &PathBuf) fn basic_lexing() { let tokens = - Lexer::scan_text("magic runes", dummy_transform).expect("Lexer should \ - succeed"); + Lexer::scan_text::<_, TokenType, LexerError>("magic runes", + dummy_transform).expect("Lexer should \ + succeed"); let tokens = tokens.into_iter() .map(|t| (*t.variant, String::from(t.lexeme))) @@ -75,7 +77,7 @@ fn basic_lexing() fn symbols_and_numbers() { let tokens = - Lexer::scan_text("13 + 37", dummy_transform).expect("Lexer should \ + Lexer::scan_text::<_, TokenType, LexerError>("13 + 37", dummy_transform).expect("Lexer should \ succeed"); let tokens = tokens.into_iter()