Added the new LexerResult to use as an Error.

The code now returns a result that has the TokenStream. For now
it is returning any Error, but later there should be a lexical analysis
error type.
This commit is contained in:
2025-10-21 11:43:11 -04:00
parent c950b70c9b
commit 6984455c12
4 changed files with 108 additions and 26 deletions

View File

@ -46,7 +46,7 @@ impl std::fmt::Display for MarkdownTokenType
// Define how you want to interpret base tokens // Define how you want to interpret base tokens
pub fn transform(input: &TokenStream<TokenType>) pub fn transform(input: &TokenStream<TokenType>)
-> TokenStream<MarkdownTokenType> -> Result<TokenStream<MarkdownTokenType>, Box<dyn std::error::Error + Send + Sync>>
{ {
let mut output = TokenStream::new(); let mut output = TokenStream::new();
@ -157,11 +157,11 @@ pub fn transform(input: &TokenStream<TokenType>)
i += 1; i += 1;
} }
output Ok(output)
} }
fn main() -> Result<(), Box<dyn std::error::Error>> fn main() -> Result<(), Box<dyn std::error::Error + Send + Sync>>
{ {
let mut path = PathBuf::from(env!("CARGO_MANIFEST_DIR")); let mut path = PathBuf::from(env!("CARGO_MANIFEST_DIR"));
path.push("examples/example.md"); path.push("examples/example.md");

View File

@ -29,7 +29,7 @@ pub struct LexerError
pub snippet: Option<String>, pub snippet: Option<String>,
/// An optional underlying error that caused this one. /// An optional underlying error that caused this one.
pub source: Option<Box<dyn Error>> pub source: Option<Box<dyn Error + Send + Sync>>
} }
impl LexerError impl LexerError
@ -88,7 +88,7 @@ impl LexerError
/// ///
/// This allows you to chain errors for more detailed diagnostics. /// This allows you to chain errors for more detailed diagnostics.
pub fn with_source<E>(mut self, err: E) -> Self pub fn with_source<E>(mut self, err: E) -> Self
where E: Error + 'static where E: Error + Send + Sync + 'static
{ {
self.source = Some(Box::new(err)); self.source = Some(Box::new(err));
self self
@ -122,6 +122,6 @@ impl Error for LexerError
/// Returns the underlying cause of this error, if any. /// Returns the underlying cause of this error, if any.
fn source(&self) -> Option<&(dyn Error + 'static)> fn source(&self) -> Option<&(dyn Error + 'static)>
{ {
self.source.as_ref().map(|e| e.as_ref()) self.source.as_deref().map(|e| e as &(dyn Error + 'static))
} }
} }

View File

@ -12,6 +12,10 @@ use super::token::{TokenStream, TokenType};
const BUFFER_SIZE: usize = 1024 * 1024; const BUFFER_SIZE: usize = 1024 * 1024;
/// The Result returned from a lexing function.
pub type LexerResult<T> = Result<TokenStream<T>, Box<dyn std::error::Error + Send + Sync>>;
/// The `Lexer` struct is responsible for performing lexical analysis /// The `Lexer` struct is responsible for performing lexical analysis
/// (tokenization) on text. /// (tokenization) on text.
@ -30,14 +34,14 @@ const BUFFER_SIZE: usize = 1024 * 1024;
/// # Example /// # Example
/// ///
/// ```rust /// ```rust
/// use rune::{Lexer, TokenStream, TokenType}; /// use rune::{Lexer, LexerError, LexerResult, TokenStream, TokenType};
/// ///
/// fn transform(tokens: &TokenStream<TokenType>) -> TokenStream<TokenType> /// fn transform(tokens: &TokenStream<TokenType>) -> LexerResult<TokenType>
/// { /// {
/// tokens.clone() /// Ok(tokens.clone())
/// } /// }
/// ///
/// let tokens = Lexer::scan_text("Runes += 42", transform).unwrap(); /// let tokens = Lexer::scan_text::<_, TokenType, LexerError>("Runes += 42", transform).unwrap();
/// ///
/// for token in &tokens /// for token in &tokens
/// { /// {
@ -85,19 +89,19 @@ impl Lexer
/// # Errors /// # Errors
/// Returns a `LexerError` if the file cannot be opened or read. /// Returns a `LexerError` if the file cannot be opened or read.
pub fn scan_file<P, F, T>(path: P, transform: F) pub fn scan_file<P, F, T>(path: P, transform: F)
-> Result<TokenStream<T>, LexerError> -> LexerResult<T>
where P: AsRef<std::path::Path>, where P: AsRef<std::path::Path>,
F: FnOnce(&TokenStream<TokenType>) -> TokenStream<T> F: FnOnce(&TokenStream<TokenType>) -> LexerResult<T>
{ {
let mut cursor = Position::default(); let mut cursor = Position::default();
let mut stream = TokenStream::new(); let mut stream = TokenStream::new();
let input_file = File::open(&path).map_err(|err| { let input_file = File::open(&path).map_err(|err| {
LexerError::new( Box::new(LexerError::new(
"Unable to open file for Lexical Analysis.", "Unable to open file for Lexical Analysis.",
Span::default(), Span::default(),
Some(path.as_ref().to_path_buf()), Some(path.as_ref().to_path_buf()),
None).with_source(err) None).with_source(err))
})?; })?;
let reader = BufReader::with_capacity(BUFFER_SIZE, input_file); let reader = BufReader::with_capacity(BUFFER_SIZE, input_file);
@ -113,13 +117,13 @@ impl Lexer
} }
Err(_) => Err(_) =>
{ {
return Err(LexerError::new("Unable to read line during \ return Err(Box::new(LexerError::new("Unable to read line during \
Lexical Analysis.", Lexical Analysis.",
Span::default(), Span::default(),
Some(path.as_ref() Some(path.as_ref()
.to_string_lossy() .to_string_lossy()
.to_string()), .to_string()),
None)); None)));
} }
} }
@ -131,7 +135,7 @@ impl Lexer
cursor.column = 0; cursor.column = 0;
} }
Ok(transform(&stream)) transform(&stream)
} }
/// Scans a full in-memory string and produces transformed tokens. /// Scans a full in-memory string and produces transformed tokens.
@ -150,9 +154,9 @@ impl Lexer
/// # Returns /// # Returns
/// A `Result<TokenStream<T>, LexerError>` where `T` is the transformed token /// A `Result<TokenStream<T>, LexerError>` where `T` is the transformed token
/// type, or an error. /// type, or an error.
pub fn scan_text<F, T>(text: &str, transform: F) pub fn scan_text<F, T, E>(text: &str, transform: F)
-> Result<TokenStream<T>, LexerError> -> LexerResult<T>
where F: FnOnce(&TokenStream<TokenType>) -> TokenStream<T> where F: FnOnce(&TokenStream<TokenType>) -> LexerResult<T>
{ {
let mut cursor = Position::default(); let mut cursor = Position::default();
let mut stream = TokenStream::new(); let mut stream = TokenStream::new();
@ -177,7 +181,7 @@ impl Lexer
stream.pop(); stream.pop();
} }
Ok(transform(&stream)) transform(&stream)
} }
/// Internal method that scans a single line of text into tokens. /// Internal method that scans a single line of text into tokens.
@ -246,3 +250,79 @@ fn get_token_type(curr_char: char) -> TokenType
_ => TokenType::Unknown _ => TokenType::Unknown
} }
} }
#[cfg(test)]
mod tests {
use super::*;
use crate::token::{TokenType, TokenStream}; // Adjust import as needed
// Basic transform function that does nothing — just clones the original stream.
fn passthrough_transform(tokens: &TokenStream<TokenType>) -> LexerResult<TokenType> {
Ok(tokens.clone())
}
#[test]
fn test_basic_text_scan() {
let input = "hello world";
let result = Lexer::scan_text::<_, TokenType, LexerError>(input, passthrough_transform).unwrap();
let types: Vec<_> = result.variants.iter().cloned().collect();
let expected = vec![
TokenType::Text, TokenType::Whitespace, TokenType::Text
];
assert_eq!(types, expected);
}
#[test]
fn test_numeric_and_symbol_scan() {
let input = "123 + 456";
let result = Lexer::scan_text::<_, TokenType, LexerError>(input, passthrough_transform).unwrap();
let types: Vec<_> = result.variants.iter().cloned().collect();
let expected = vec![
TokenType::Numeric,
TokenType::Whitespace,
TokenType::Symbol,
TokenType::Whitespace,
TokenType::Numeric
];
assert_eq!(types, expected);
}
#[test]
fn test_multiple_lines() {
let input = "abc\n123";
let result = Lexer::scan_text::<_, TokenType, LexerError>(input, passthrough_transform).unwrap();
let types: Vec<_> = result.variants.iter().cloned().collect();
let expected = vec![
TokenType::Text,
TokenType::Newline,
TokenType::Numeric
];
assert_eq!(types, expected);
}
#[test]
fn test_trailing_newline_handling() {
let input = "abc";
let result = Lexer::scan_text::<_, TokenType, LexerError>(input, passthrough_transform).unwrap();
// Should NOT end in a Newline since no trailing newline in input
assert_ne!(result.lexemes.last().unwrap(), "\n");
}
#[test]
fn test_empty_input() {
let input = "";
let result = Lexer::scan_text::<_, TokenType, LexerError>(input, passthrough_transform).unwrap();
assert!(result.lexemes.is_empty());
}
}

View File

@ -15,7 +15,8 @@ struct TestCase<'a>
fn dummy_transform(tokens: &TokenStream<TokenType>) -> TokenStream<TokenType> fn dummy_transform(tokens: &TokenStream<TokenType>)
-> Result<TokenStream<TokenType>, Box<dyn std::error::Error + Send + Sync>>
{ {
/* /*
let mut stream: TokenStream<(TokenType, String)> = TokenStream::default(); let mut stream: TokenStream<(TokenType, String)> = TokenStream::default();
@ -30,7 +31,7 @@ fn dummy_transform(tokens: &TokenStream<TokenType>) -> TokenStream<TokenType>
stream stream
*/ */
tokens.clone() Ok(tokens.clone())
} }
fn write_temp_file(name: &str, content: &str) -> PathBuf fn write_temp_file(name: &str, content: &str) -> PathBuf
@ -56,8 +57,9 @@ fn cleanup_temp_file(path: &PathBuf)
fn basic_lexing() fn basic_lexing()
{ {
let tokens = let tokens =
Lexer::scan_text("magic runes", dummy_transform).expect("Lexer should \ Lexer::scan_text::<_, TokenType, LexerError>("magic runes",
succeed"); dummy_transform).expect("Lexer should \
succeed");
let tokens = tokens.into_iter() let tokens = tokens.into_iter()
.map(|t| (*t.variant, String::from(t.lexeme))) .map(|t| (*t.variant, String::from(t.lexeme)))
@ -75,7 +77,7 @@ fn basic_lexing()
fn symbols_and_numbers() fn symbols_and_numbers()
{ {
let tokens = let tokens =
Lexer::scan_text("13 + 37", dummy_transform).expect("Lexer should \ Lexer::scan_text::<_, TokenType, LexerError>("13 + 37", dummy_transform).expect("Lexer should \
succeed"); succeed");
let tokens = tokens.into_iter() let tokens = tokens.into_iter()