Added the new LexerResult to use as an Error.
The code now returns a result that has the TokenStream. For now it is returning any Error, but later there should be a lexical analysis error type.
This commit is contained in:
@ -46,7 +46,7 @@ impl std::fmt::Display for MarkdownTokenType
|
|||||||
|
|
||||||
// Define how you want to interpret base tokens
|
// Define how you want to interpret base tokens
|
||||||
pub fn transform(input: &TokenStream<TokenType>)
|
pub fn transform(input: &TokenStream<TokenType>)
|
||||||
-> TokenStream<MarkdownTokenType>
|
-> Result<TokenStream<MarkdownTokenType>, Box<dyn std::error::Error + Send + Sync>>
|
||||||
{
|
{
|
||||||
let mut output = TokenStream::new();
|
let mut output = TokenStream::new();
|
||||||
|
|
||||||
@ -157,11 +157,11 @@ pub fn transform(input: &TokenStream<TokenType>)
|
|||||||
i += 1;
|
i += 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
output
|
Ok(output)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
fn main() -> Result<(), Box<dyn std::error::Error>>
|
fn main() -> Result<(), Box<dyn std::error::Error + Send + Sync>>
|
||||||
{
|
{
|
||||||
let mut path = PathBuf::from(env!("CARGO_MANIFEST_DIR"));
|
let mut path = PathBuf::from(env!("CARGO_MANIFEST_DIR"));
|
||||||
path.push("examples/example.md");
|
path.push("examples/example.md");
|
||||||
|
|||||||
@ -29,7 +29,7 @@ pub struct LexerError
|
|||||||
pub snippet: Option<String>,
|
pub snippet: Option<String>,
|
||||||
|
|
||||||
/// An optional underlying error that caused this one.
|
/// An optional underlying error that caused this one.
|
||||||
pub source: Option<Box<dyn Error>>
|
pub source: Option<Box<dyn Error + Send + Sync>>
|
||||||
}
|
}
|
||||||
|
|
||||||
impl LexerError
|
impl LexerError
|
||||||
@ -88,7 +88,7 @@ impl LexerError
|
|||||||
///
|
///
|
||||||
/// This allows you to chain errors for more detailed diagnostics.
|
/// This allows you to chain errors for more detailed diagnostics.
|
||||||
pub fn with_source<E>(mut self, err: E) -> Self
|
pub fn with_source<E>(mut self, err: E) -> Self
|
||||||
where E: Error + 'static
|
where E: Error + Send + Sync + 'static
|
||||||
{
|
{
|
||||||
self.source = Some(Box::new(err));
|
self.source = Some(Box::new(err));
|
||||||
self
|
self
|
||||||
@ -122,6 +122,6 @@ impl Error for LexerError
|
|||||||
/// Returns the underlying cause of this error, if any.
|
/// Returns the underlying cause of this error, if any.
|
||||||
fn source(&self) -> Option<&(dyn Error + 'static)>
|
fn source(&self) -> Option<&(dyn Error + 'static)>
|
||||||
{
|
{
|
||||||
self.source.as_ref().map(|e| e.as_ref())
|
self.source.as_deref().map(|e| e as &(dyn Error + 'static))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
110
src/lexer.rs
110
src/lexer.rs
@ -12,6 +12,10 @@ use super::token::{TokenStream, TokenType};
|
|||||||
const BUFFER_SIZE: usize = 1024 * 1024;
|
const BUFFER_SIZE: usize = 1024 * 1024;
|
||||||
|
|
||||||
|
|
||||||
|
/// The Result returned from a lexing function.
|
||||||
|
pub type LexerResult<T> = Result<TokenStream<T>, Box<dyn std::error::Error + Send + Sync>>;
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/// The `Lexer` struct is responsible for performing lexical analysis
|
/// The `Lexer` struct is responsible for performing lexical analysis
|
||||||
/// (tokenization) on text.
|
/// (tokenization) on text.
|
||||||
@ -30,14 +34,14 @@ const BUFFER_SIZE: usize = 1024 * 1024;
|
|||||||
/// # Example
|
/// # Example
|
||||||
///
|
///
|
||||||
/// ```rust
|
/// ```rust
|
||||||
/// use rune::{Lexer, TokenStream, TokenType};
|
/// use rune::{Lexer, LexerError, LexerResult, TokenStream, TokenType};
|
||||||
///
|
///
|
||||||
/// fn transform(tokens: &TokenStream<TokenType>) -> TokenStream<TokenType>
|
/// fn transform(tokens: &TokenStream<TokenType>) -> LexerResult<TokenType>
|
||||||
/// {
|
/// {
|
||||||
/// tokens.clone()
|
/// Ok(tokens.clone())
|
||||||
/// }
|
/// }
|
||||||
///
|
///
|
||||||
/// let tokens = Lexer::scan_text("Runes += 42", transform).unwrap();
|
/// let tokens = Lexer::scan_text::<_, TokenType, LexerError>("Runes += 42", transform).unwrap();
|
||||||
///
|
///
|
||||||
/// for token in &tokens
|
/// for token in &tokens
|
||||||
/// {
|
/// {
|
||||||
@ -85,19 +89,19 @@ impl Lexer
|
|||||||
/// # Errors
|
/// # Errors
|
||||||
/// Returns a `LexerError` if the file cannot be opened or read.
|
/// Returns a `LexerError` if the file cannot be opened or read.
|
||||||
pub fn scan_file<P, F, T>(path: P, transform: F)
|
pub fn scan_file<P, F, T>(path: P, transform: F)
|
||||||
-> Result<TokenStream<T>, LexerError>
|
-> LexerResult<T>
|
||||||
where P: AsRef<std::path::Path>,
|
where P: AsRef<std::path::Path>,
|
||||||
F: FnOnce(&TokenStream<TokenType>) -> TokenStream<T>
|
F: FnOnce(&TokenStream<TokenType>) -> LexerResult<T>
|
||||||
{
|
{
|
||||||
let mut cursor = Position::default();
|
let mut cursor = Position::default();
|
||||||
let mut stream = TokenStream::new();
|
let mut stream = TokenStream::new();
|
||||||
|
|
||||||
let input_file = File::open(&path).map_err(|err| {
|
let input_file = File::open(&path).map_err(|err| {
|
||||||
LexerError::new(
|
Box::new(LexerError::new(
|
||||||
"Unable to open file for Lexical Analysis.",
|
"Unable to open file for Lexical Analysis.",
|
||||||
Span::default(),
|
Span::default(),
|
||||||
Some(path.as_ref().to_path_buf()),
|
Some(path.as_ref().to_path_buf()),
|
||||||
None).with_source(err)
|
None).with_source(err))
|
||||||
})?;
|
})?;
|
||||||
|
|
||||||
let reader = BufReader::with_capacity(BUFFER_SIZE, input_file);
|
let reader = BufReader::with_capacity(BUFFER_SIZE, input_file);
|
||||||
@ -113,13 +117,13 @@ impl Lexer
|
|||||||
}
|
}
|
||||||
Err(_) =>
|
Err(_) =>
|
||||||
{
|
{
|
||||||
return Err(LexerError::new("Unable to read line during \
|
return Err(Box::new(LexerError::new("Unable to read line during \
|
||||||
Lexical Analysis.",
|
Lexical Analysis.",
|
||||||
Span::default(),
|
Span::default(),
|
||||||
Some(path.as_ref()
|
Some(path.as_ref()
|
||||||
.to_string_lossy()
|
.to_string_lossy()
|
||||||
.to_string()),
|
.to_string()),
|
||||||
None));
|
None)));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -131,7 +135,7 @@ impl Lexer
|
|||||||
cursor.column = 0;
|
cursor.column = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
Ok(transform(&stream))
|
transform(&stream)
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Scans a full in-memory string and produces transformed tokens.
|
/// Scans a full in-memory string and produces transformed tokens.
|
||||||
@ -150,9 +154,9 @@ impl Lexer
|
|||||||
/// # Returns
|
/// # Returns
|
||||||
/// A `Result<TokenStream<T>, LexerError>` where `T` is the transformed token
|
/// A `Result<TokenStream<T>, LexerError>` where `T` is the transformed token
|
||||||
/// type, or an error.
|
/// type, or an error.
|
||||||
pub fn scan_text<F, T>(text: &str, transform: F)
|
pub fn scan_text<F, T, E>(text: &str, transform: F)
|
||||||
-> Result<TokenStream<T>, LexerError>
|
-> LexerResult<T>
|
||||||
where F: FnOnce(&TokenStream<TokenType>) -> TokenStream<T>
|
where F: FnOnce(&TokenStream<TokenType>) -> LexerResult<T>
|
||||||
{
|
{
|
||||||
let mut cursor = Position::default();
|
let mut cursor = Position::default();
|
||||||
let mut stream = TokenStream::new();
|
let mut stream = TokenStream::new();
|
||||||
@ -177,7 +181,7 @@ impl Lexer
|
|||||||
stream.pop();
|
stream.pop();
|
||||||
}
|
}
|
||||||
|
|
||||||
Ok(transform(&stream))
|
transform(&stream)
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Internal method that scans a single line of text into tokens.
|
/// Internal method that scans a single line of text into tokens.
|
||||||
@ -246,3 +250,79 @@ fn get_token_type(curr_char: char) -> TokenType
|
|||||||
_ => TokenType::Unknown
|
_ => TokenType::Unknown
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod tests {
|
||||||
|
use super::*;
|
||||||
|
use crate::token::{TokenType, TokenStream}; // Adjust import as needed
|
||||||
|
|
||||||
|
// Basic transform function that does nothing — just clones the original stream.
|
||||||
|
fn passthrough_transform(tokens: &TokenStream<TokenType>) -> LexerResult<TokenType> {
|
||||||
|
Ok(tokens.clone())
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_basic_text_scan() {
|
||||||
|
let input = "hello world";
|
||||||
|
let result = Lexer::scan_text::<_, TokenType, LexerError>(input, passthrough_transform).unwrap();
|
||||||
|
|
||||||
|
let types: Vec<_> = result.variants.iter().cloned().collect();
|
||||||
|
let expected = vec![
|
||||||
|
TokenType::Text, TokenType::Whitespace, TokenType::Text
|
||||||
|
];
|
||||||
|
|
||||||
|
assert_eq!(types, expected);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_numeric_and_symbol_scan() {
|
||||||
|
let input = "123 + 456";
|
||||||
|
let result = Lexer::scan_text::<_, TokenType, LexerError>(input, passthrough_transform).unwrap();
|
||||||
|
|
||||||
|
let types: Vec<_> = result.variants.iter().cloned().collect();
|
||||||
|
let expected = vec![
|
||||||
|
TokenType::Numeric,
|
||||||
|
TokenType::Whitespace,
|
||||||
|
TokenType::Symbol,
|
||||||
|
TokenType::Whitespace,
|
||||||
|
TokenType::Numeric
|
||||||
|
];
|
||||||
|
|
||||||
|
assert_eq!(types, expected);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_multiple_lines() {
|
||||||
|
let input = "abc\n123";
|
||||||
|
let result = Lexer::scan_text::<_, TokenType, LexerError>(input, passthrough_transform).unwrap();
|
||||||
|
|
||||||
|
let types: Vec<_> = result.variants.iter().cloned().collect();
|
||||||
|
let expected = vec![
|
||||||
|
TokenType::Text,
|
||||||
|
TokenType::Newline,
|
||||||
|
TokenType::Numeric
|
||||||
|
];
|
||||||
|
|
||||||
|
assert_eq!(types, expected);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_trailing_newline_handling() {
|
||||||
|
let input = "abc";
|
||||||
|
let result = Lexer::scan_text::<_, TokenType, LexerError>(input, passthrough_transform).unwrap();
|
||||||
|
|
||||||
|
// Should NOT end in a Newline since no trailing newline in input
|
||||||
|
assert_ne!(result.lexemes.last().unwrap(), "\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_empty_input() {
|
||||||
|
let input = "";
|
||||||
|
let result = Lexer::scan_text::<_, TokenType, LexerError>(input, passthrough_transform).unwrap();
|
||||||
|
|
||||||
|
assert!(result.lexemes.is_empty());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|||||||
@ -15,7 +15,8 @@ struct TestCase<'a>
|
|||||||
|
|
||||||
|
|
||||||
|
|
||||||
fn dummy_transform(tokens: &TokenStream<TokenType>) -> TokenStream<TokenType>
|
fn dummy_transform(tokens: &TokenStream<TokenType>)
|
||||||
|
-> Result<TokenStream<TokenType>, Box<dyn std::error::Error + Send + Sync>>
|
||||||
{
|
{
|
||||||
/*
|
/*
|
||||||
let mut stream: TokenStream<(TokenType, String)> = TokenStream::default();
|
let mut stream: TokenStream<(TokenType, String)> = TokenStream::default();
|
||||||
@ -30,7 +31,7 @@ fn dummy_transform(tokens: &TokenStream<TokenType>) -> TokenStream<TokenType>
|
|||||||
|
|
||||||
stream
|
stream
|
||||||
*/
|
*/
|
||||||
tokens.clone()
|
Ok(tokens.clone())
|
||||||
}
|
}
|
||||||
|
|
||||||
fn write_temp_file(name: &str, content: &str) -> PathBuf
|
fn write_temp_file(name: &str, content: &str) -> PathBuf
|
||||||
@ -56,8 +57,9 @@ fn cleanup_temp_file(path: &PathBuf)
|
|||||||
fn basic_lexing()
|
fn basic_lexing()
|
||||||
{
|
{
|
||||||
let tokens =
|
let tokens =
|
||||||
Lexer::scan_text("magic runes", dummy_transform).expect("Lexer should \
|
Lexer::scan_text::<_, TokenType, LexerError>("magic runes",
|
||||||
succeed");
|
dummy_transform).expect("Lexer should \
|
||||||
|
succeed");
|
||||||
|
|
||||||
let tokens = tokens.into_iter()
|
let tokens = tokens.into_iter()
|
||||||
.map(|t| (*t.variant, String::from(t.lexeme)))
|
.map(|t| (*t.variant, String::from(t.lexeme)))
|
||||||
@ -75,7 +77,7 @@ fn basic_lexing()
|
|||||||
fn symbols_and_numbers()
|
fn symbols_and_numbers()
|
||||||
{
|
{
|
||||||
let tokens =
|
let tokens =
|
||||||
Lexer::scan_text("13 + 37", dummy_transform).expect("Lexer should \
|
Lexer::scan_text::<_, TokenType, LexerError>("13 + 37", dummy_transform).expect("Lexer should \
|
||||||
succeed");
|
succeed");
|
||||||
|
|
||||||
let tokens = tokens.into_iter()
|
let tokens = tokens.into_iter()
|
||||||
|
|||||||
Reference in New Issue
Block a user