Enhanced the LexerError and Documentation of Types

I went and enhanced the LexerError to now wrap a Rust source Error.
This makes it so we can wrapup the possible IO errors we get when trying
to open a file and read its contents.

I also added some documentation for all the implemented functions.
This commit is contained in:
Myrddin Dundragon 2025-04-16 20:03:15 -04:00
parent 31290cc86f
commit a926e08061
4 changed files with 216 additions and 58 deletions

View File

@ -1,59 +1,127 @@
use std::{error::Error, path::PathBuf};
use super::position::Span; use super::position::Span;
/// Represents an error encountered during lexical analysis.
///
/// `LexerError` contains contextual information about where the error
/// occurred in the source input, an optional offending snippet,
/// the file path (if applicable), and an optional underlying error
/// that triggered the failure.
///
/// It is designed to provide detailed diagnostics for file-based or
/// in-memory parsing and is compatible with error reporting ecosystems.
#[derive(Debug)]
pub struct LexerError {
/// A human-readable error message.
pub message: String,
/// An error that has occured during lexigraphical analysis. /// The span where the error occurred.
#[derive(Debug, Clone, PartialEq)] pub span: Span,
pub struct LexerError
{
/// A human-readable error message.
pub message: String,
/// The start and end of where the error is located in the file. /// The file that the error occurred in, if known.
pub span: Span, pub file: Option<PathBuf>,
/// The file that the error occured within. /// The source snippet related to the error, if known.
pub file: Option<std::path::PathBuf>, pub snippet: Option<String>,
/// The problematic string (optional). /// An optional underlying error that caused this one.
pub snippet: Option<String> pub source: Option<Box<dyn Error>>,
} }
impl LexerError {
/// Creates a new `LexerError` with a message, span, and optional context.
///
/// # Parameters
/// - `message`: A human-readable explanation of the error.
/// - `span`: The region in the source where the error occurred.
/// - `file`: An optional path to the file in which the error occurred.
/// - `snippet`: An optional problematic input string.
///
/// # Returns
/// A new instance of `LexerError`.
pub fn new<S, T>(
message: S,
span: Span,
file: Option<T>,
snippet: Option<S>,
) -> Self
where
S: Into<String>,
T: Into<PathBuf>,
{
LexerError {
message: message.into(),
span,
file: file.map(Into::into),
snippet: snippet.map(Into::into),
source: None,
}
}
/// Creates a `LexerError` from only a message and span.
///
/// This is useful when file or snippet context is not available.
pub fn from_message<S>(message: S, span: Span) -> Self
where
S: Into<String>,
{
Self::new(message, span, None::<PathBuf>, None::<S>)
}
impl LexerError /// Attaches a snippet of the offending source code.
{ ///
pub fn new<S, T>(message: S, span: Span, file: Option<T>, /// This is helpful for diagnostics and tooling output.
snippet: Option<S>) pub fn with_snippet<S>(mut self, snippet: S) -> Self
-> Self where
where S: Into<String>, S: Into<String>,
T: Into<std::path::PathBuf> {
{ self.snippet = Some(snippet.into());
LexerError { message: message.into(), self
span, }
file: file.map(|t| t.into()),
snippet: snippet.map(|s| s.into()) } /// Attaches the path of the file in which the error occurred.
} pub fn with_file<T>(mut self, file: T) -> Self
where
T: Into<PathBuf>,
{
self.file = Some(file.into());
self
}
/// Wraps a source error that caused this `LexerError`.
///
/// This allows you to chain errors for more detailed diagnostics.
pub fn with_source<E>(mut self, err: E) -> Self
where
E: Error + 'static,
{
self.source = Some(Box::new(err));
self
}
} }
impl std::fmt::Display for LexerError impl std::fmt::Display for LexerError {
{ fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result write!(f, "Lexer error at {}", self.span)?;
{
match &self.snippet if let Some(file) = &self.file {
{ write!(f, " in file `{}`", file.display())?;
Some(snippet) => }
{
write!(f, write!(f, ": {}", self.message)?;
"LexerError at {}: {} (snippet: '{}')",
self.span, self.message, snippet) if let Some(snippet) = &self.snippet {
} write!(f, "\n --> Snippet: `{}`", snippet)?;
None => }
{
write!(f, "LexerError at {}: {}", self.span, self.message) Ok(())
} }
}
}
} }
impl std::error::Error for LexerError {} impl Error for LexerError {
/// Returns the underlying cause of this error, if any.
fn source(&self) -> Option<&(dyn Error + 'static)> {
self.source.as_ref().map(|e| e.as_ref())
}
}

View File

@ -72,13 +72,12 @@ impl Lexer
let mut cursor = Position::default(); let mut cursor = Position::default();
let mut stream = TokenStream::new(); let mut stream = TokenStream::new();
let input_file = File::open(&path).map_err(|_error| { let input_file = File::open(&path).map_err(|err| {
LexerError::new( LexerError::new(
"Unable to open file for Lexigraphical Analysis.", "Unable to open file for Lexical Analysis.",
Span::default(), Span::default(),
Some(path.as_ref().to_string_lossy().to_string()), Some(path.as_ref().to_path_buf()),
None, None).with_source(err)
)
})?; })?;
let reader = BufReader::with_capacity(BUFFER_SIZE, input_file); let reader = BufReader::with_capacity(BUFFER_SIZE, input_file);
@ -138,7 +137,8 @@ impl Lexer
} }
/// Internal: scans a single line of text into tokens. /// Internal: scans a single line of text into tokens.
fn scan(line: &str, stream: &mut TokenStream<TokenType>, cursor: &mut Position) fn scan(line: &str, stream: &mut TokenStream<TokenType>,
cursor: &mut Position)
{ {
for c in line.chars() for c in line.chars()
{ {

View File

@ -25,6 +25,14 @@ pub struct Span
impl Position impl Position
{ {
/// Creates a new `Position` with the given line and column.
///
/// # Arguments
/// * `line` - The line number in the source (0-based).
/// * `column` - The column number within the line (0-based).
///
/// # Returns
/// A `Position` representing the specified line and column.
pub fn new(line: usize, column: usize) -> Self pub fn new(line: usize, column: usize) -> Self
{ {
Position { line, column } Position { line, column }
@ -51,29 +59,66 @@ impl std::fmt::Display for Position
impl Span impl Span
{ {
/// Creates a new `Span` from a starting and ending `Position`.
///
/// # Arguments
/// * `start` - The starting position of the span.
/// * `end` - The ending position of the span.
///
/// # Returns
/// A `Span` covering the range from `start` to `end`.
pub fn new(start: Position, end: Position) -> Self pub fn new(start: Position, end: Position) -> Self
{ {
Span { start, end } Span { start, end }
} }
/// Creates a `Span` that covers a single position.
///
/// Useful for zero-length spans or pinpointing a specific token or
/// character.
///
/// # Arguments
/// * `val` - The position to be used as both start and end.
///
/// # Returns
/// A `Span` that starts and ends at `val`.
pub fn with_single(val: Position) -> Self pub fn with_single(val: Position) -> Self
{ {
Span { start: val, Span { start: val,
end: val } end: val }
} }
/// Merges two spans into one, taking the start of the first
/// and the end of the second.
///
/// # Arguments
/// * `a` - The first span.
/// * `b` - The second span.
///
/// # Returns
/// A `Span` that starts at `a.start` and ends at `b.end`.
///
/// # Note
/// Assumes that `a` comes before `b` in source order.
pub fn merge(a: Span, b: Span) -> Self pub fn merge(a: Span, b: Span) -> Self
{ {
Span { start: a.start, Span { start: a.start,
end: b.end } end: b.end }
} }
pub fn merge_with(&self, other: Span) -> Span { /// Merges this span with another, producing a new span
Span { /// from `self.start` to `other.end`.
start: self.start, ///
end: other.end, /// # Arguments
} /// * `other` - Another span to merge with.
} ///
/// # Returns
/// A new `Span` from the start of `self` to the end of `other`.
pub fn merge_with(&self, other: Span) -> Span
{
Span { start: self.start,
end: other.end }
}
} }
impl Default for Span impl Default for Span

View File

@ -120,6 +120,10 @@ pub struct TokenStreamIterMut<'a, T>
impl<T> TokenStream<T> impl<T> TokenStream<T>
{ {
/// Creates a new, empty `TokenStream`.
///
/// # Returns
/// A `TokenStream` with no tokens.
pub fn new() -> Self pub fn new() -> Self
{ {
TokenStream { lexemes: Vec::new(), TokenStream { lexemes: Vec::new(),
@ -127,16 +131,31 @@ impl<T> TokenStream<T>
locations: Vec::new() } locations: Vec::new() }
} }
/// Returns the number of tokens in the stream.
///
/// # Returns
/// The number of tokens currently stored.
pub fn len(&self) -> usize pub fn len(&self) -> usize
{ {
self.lexemes.len() self.lexemes.len()
} }
/// Checks if the token stream is empty.
///
/// # Returns
/// `true` if there are no tokens, `false` otherwise.
pub fn is_empty(&self) -> bool pub fn is_empty(&self) -> bool
{ {
self.lexemes.is_empty() self.lexemes.is_empty()
} }
/// Retrieves an immutable reference to the token at the given index.
///
/// # Arguments
/// * `index` - The position of the token in the stream.
///
/// # Returns
/// `Some(Token)` if the index is valid, otherwise `None`.
pub fn get(&self, index: usize) -> Option<Token<'_, T>> pub fn get(&self, index: usize) -> Option<Token<'_, T>>
{ {
if index < self.lexemes.len() if index < self.lexemes.len()
@ -151,12 +170,27 @@ impl<T> TokenStream<T>
} }
} }
/// Returns an iterator over immutable references to the tokens.
///
/// # Returns
/// A `TokenStreamIter` iterator for the stream.
pub fn iter(&self) -> TokenStreamIter<'_, T> pub fn iter(&self) -> TokenStreamIter<'_, T>
{ {
TokenStreamIter { stream: self, TokenStreamIter { stream: self,
index: 0 } index: 0 }
} }
/// Retrieves a mutable reference to the token at the given index.
///
/// # Arguments
/// * `index` - The position of the token in the stream.
///
/// # Returns
/// `Some(TokenMut)` if the index is valid, otherwise `None`.
///
/// # Safety
/// Uses `unsafe` to split borrows for individual mutable access
/// to components of the token without violating Rusts aliasing rules.
pub fn get_mut(&mut self, index: usize) -> Option<TokenMut<'_, T>> pub fn get_mut(&mut self, index: usize) -> Option<TokenMut<'_, T>>
{ {
if index < self.lexemes.len() if index < self.lexemes.len()
@ -167,7 +201,6 @@ impl<T> TokenStream<T>
let variant = &mut self.variants[index] as *mut T; let variant = &mut self.variants[index] as *mut T;
let span = &mut self.locations[index] as *mut Span; let span = &mut self.locations[index] as *mut Span;
// Convert &mut String to &mut str safely.
unsafe { unsafe {
Some(TokenMut { lexeme: &mut *lexeme.as_mut() Some(TokenMut { lexeme: &mut *lexeme.as_mut()
.unwrap() .unwrap()
@ -182,6 +215,9 @@ impl<T> TokenStream<T>
} }
} }
/// Clears all tokens from the stream.
///
/// This resets the stream to an empty state.
pub fn clear(&mut self) pub fn clear(&mut self)
{ {
self.lexemes.clear(); self.lexemes.clear();
@ -189,6 +225,12 @@ impl<T> TokenStream<T>
self.locations.clear(); self.locations.clear();
} }
/// Pushes a new token onto the end of the stream.
///
/// # Arguments
/// * `lexeme` - The text of the token.
/// * `variant` - The token type or value.
/// * `span` - The location of the token in the source.
pub fn push(&mut self, lexeme: String, variant: T, span: Span) pub fn push(&mut self, lexeme: String, variant: T, span: Span)
{ {
self.lexemes.push(lexeme); self.lexemes.push(lexeme);
@ -196,6 +238,10 @@ impl<T> TokenStream<T>
self.locations.push(span); self.locations.push(span);
} }
/// Returns a mutable iterator over the tokens in the stream.
///
/// # Returns
/// A `TokenStreamIterMut` for mutable access to each token component.
pub fn iter_mut(&mut self) -> TokenStreamIterMut<'_, T> pub fn iter_mut(&mut self) -> TokenStreamIterMut<'_, T>
{ {
TokenStreamIterMut { lexemes: self.lexemes.iter_mut(), TokenStreamIterMut { lexemes: self.lexemes.iter_mut(),
@ -204,7 +250,6 @@ impl<T> TokenStream<T>
} }
} }
impl<'a, T> IntoIterator for &'a TokenStream<T> impl<'a, T> IntoIterator for &'a TokenStream<T>
{ {
type IntoIter = TokenStreamIter<'a, T>; type IntoIter = TokenStreamIter<'a, T>;