Enhanced the LexerError and Documentation of Types

I went and enhanced the LexerError to now wrap a Rust source Error.
This makes it so we can wrapup the possible IO errors we get when trying
to open a file and read its contents.

I also added some documentation for all the implemented functions.
This commit is contained in:
Myrddin Dundragon 2025-04-16 20:03:15 -04:00
parent 31290cc86f
commit a926e08061
4 changed files with 216 additions and 58 deletions

View File

@ -1,59 +1,127 @@
use std::{error::Error, path::PathBuf};
use super::position::Span;
/// An error that has occured during lexigraphical analysis.
#[derive(Debug, Clone, PartialEq)]
pub struct LexerError
{
/// Represents an error encountered during lexical analysis.
///
/// `LexerError` contains contextual information about where the error
/// occurred in the source input, an optional offending snippet,
/// the file path (if applicable), and an optional underlying error
/// that triggered the failure.
///
/// It is designed to provide detailed diagnostics for file-based or
/// in-memory parsing and is compatible with error reporting ecosystems.
#[derive(Debug)]
pub struct LexerError {
/// A human-readable error message.
pub message: String,
/// The start and end of where the error is located in the file.
/// The span where the error occurred.
pub span: Span,
/// The file that the error occured within.
pub file: Option<std::path::PathBuf>,
/// The file that the error occurred in, if known.
pub file: Option<PathBuf>,
/// The problematic string (optional).
pub snippet: Option<String>
/// The source snippet related to the error, if known.
pub snippet: Option<String>,
/// An optional underlying error that caused this one.
pub source: Option<Box<dyn Error>>,
}
impl LexerError
impl LexerError {
/// Creates a new `LexerError` with a message, span, and optional context.
///
/// # Parameters
/// - `message`: A human-readable explanation of the error.
/// - `span`: The region in the source where the error occurred.
/// - `file`: An optional path to the file in which the error occurred.
/// - `snippet`: An optional problematic input string.
///
/// # Returns
/// A new instance of `LexerError`.
pub fn new<S, T>(
message: S,
span: Span,
file: Option<T>,
snippet: Option<S>,
) -> Self
where
S: Into<String>,
T: Into<PathBuf>,
{
pub fn new<S, T>(message: S, span: Span, file: Option<T>,
snippet: Option<S>)
-> Self
where S: Into<String>,
T: Into<std::path::PathBuf>
{
LexerError { message: message.into(),
LexerError {
message: message.into(),
span,
file: file.map(|t| t.into()),
snippet: snippet.map(|s| s.into()) }
file: file.map(Into::into),
snippet: snippet.map(Into::into),
source: None,
}
}
impl std::fmt::Display for LexerError
/// Creates a `LexerError` from only a message and span.
///
/// This is useful when file or snippet context is not available.
pub fn from_message<S>(message: S, span: Span) -> Self
where
S: Into<String>,
{
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result
{
match &self.snippet
{
Some(snippet) =>
{
write!(f,
"LexerError at {}: {} (snippet: '{}')",
self.span, self.message, snippet)
Self::new(message, span, None::<PathBuf>, None::<S>)
}
None =>
/// Attaches a snippet of the offending source code.
///
/// This is helpful for diagnostics and tooling output.
pub fn with_snippet<S>(mut self, snippet: S) -> Self
where
S: Into<String>,
{
write!(f, "LexerError at {}: {}", self.span, self.message)
self.snippet = Some(snippet.into());
self
}
/// Attaches the path of the file in which the error occurred.
pub fn with_file<T>(mut self, file: T) -> Self
where
T: Into<PathBuf>,
{
self.file = Some(file.into());
self
}
/// Wraps a source error that caused this `LexerError`.
///
/// This allows you to chain errors for more detailed diagnostics.
pub fn with_source<E>(mut self, err: E) -> Self
where
E: Error + 'static,
{
self.source = Some(Box::new(err));
self
}
}
impl std::error::Error for LexerError {}
impl std::fmt::Display for LexerError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "Lexer error at {}", self.span)?;
if let Some(file) = &self.file {
write!(f, " in file `{}`", file.display())?;
}
write!(f, ": {}", self.message)?;
if let Some(snippet) = &self.snippet {
write!(f, "\n --> Snippet: `{}`", snippet)?;
}
Ok(())
}
}
impl Error for LexerError {
/// Returns the underlying cause of this error, if any.
fn source(&self) -> Option<&(dyn Error + 'static)> {
self.source.as_ref().map(|e| e.as_ref())
}
}

View File

@ -72,13 +72,12 @@ impl Lexer
let mut cursor = Position::default();
let mut stream = TokenStream::new();
let input_file = File::open(&path).map_err(|_error| {
let input_file = File::open(&path).map_err(|err| {
LexerError::new(
"Unable to open file for Lexigraphical Analysis.",
"Unable to open file for Lexical Analysis.",
Span::default(),
Some(path.as_ref().to_string_lossy().to_string()),
None,
)
Some(path.as_ref().to_path_buf()),
None).with_source(err)
})?;
let reader = BufReader::with_capacity(BUFFER_SIZE, input_file);
@ -138,7 +137,8 @@ impl Lexer
}
/// Internal: scans a single line of text into tokens.
fn scan(line: &str, stream: &mut TokenStream<TokenType>, cursor: &mut Position)
fn scan(line: &str, stream: &mut TokenStream<TokenType>,
cursor: &mut Position)
{
for c in line.chars()
{

View File

@ -25,6 +25,14 @@ pub struct Span
impl Position
{
/// Creates a new `Position` with the given line and column.
///
/// # Arguments
/// * `line` - The line number in the source (0-based).
/// * `column` - The column number within the line (0-based).
///
/// # Returns
/// A `Position` representing the specified line and column.
pub fn new(line: usize, column: usize) -> Self
{
Position { line, column }
@ -51,28 +59,65 @@ impl std::fmt::Display for Position
impl Span
{
/// Creates a new `Span` from a starting and ending `Position`.
///
/// # Arguments
/// * `start` - The starting position of the span.
/// * `end` - The ending position of the span.
///
/// # Returns
/// A `Span` covering the range from `start` to `end`.
pub fn new(start: Position, end: Position) -> Self
{
Span { start, end }
}
/// Creates a `Span` that covers a single position.
///
/// Useful for zero-length spans or pinpointing a specific token or
/// character.
///
/// # Arguments
/// * `val` - The position to be used as both start and end.
///
/// # Returns
/// A `Span` that starts and ends at `val`.
pub fn with_single(val: Position) -> Self
{
Span { start: val,
end: val }
}
/// Merges two spans into one, taking the start of the first
/// and the end of the second.
///
/// # Arguments
/// * `a` - The first span.
/// * `b` - The second span.
///
/// # Returns
/// A `Span` that starts at `a.start` and ends at `b.end`.
///
/// # Note
/// Assumes that `a` comes before `b` in source order.
pub fn merge(a: Span, b: Span) -> Self
{
Span { start: a.start,
end: b.end }
}
pub fn merge_with(&self, other: Span) -> Span {
Span {
start: self.start,
end: other.end,
}
/// Merges this span with another, producing a new span
/// from `self.start` to `other.end`.
///
/// # Arguments
/// * `other` - Another span to merge with.
///
/// # Returns
/// A new `Span` from the start of `self` to the end of `other`.
pub fn merge_with(&self, other: Span) -> Span
{
Span { start: self.start,
end: other.end }
}
}

View File

@ -120,6 +120,10 @@ pub struct TokenStreamIterMut<'a, T>
impl<T> TokenStream<T>
{
/// Creates a new, empty `TokenStream`.
///
/// # Returns
/// A `TokenStream` with no tokens.
pub fn new() -> Self
{
TokenStream { lexemes: Vec::new(),
@ -127,16 +131,31 @@ impl<T> TokenStream<T>
locations: Vec::new() }
}
/// Returns the number of tokens in the stream.
///
/// # Returns
/// The number of tokens currently stored.
pub fn len(&self) -> usize
{
self.lexemes.len()
}
/// Checks if the token stream is empty.
///
/// # Returns
/// `true` if there are no tokens, `false` otherwise.
pub fn is_empty(&self) -> bool
{
self.lexemes.is_empty()
}
/// Retrieves an immutable reference to the token at the given index.
///
/// # Arguments
/// * `index` - The position of the token in the stream.
///
/// # Returns
/// `Some(Token)` if the index is valid, otherwise `None`.
pub fn get(&self, index: usize) -> Option<Token<'_, T>>
{
if index < self.lexemes.len()
@ -151,12 +170,27 @@ impl<T> TokenStream<T>
}
}
/// Returns an iterator over immutable references to the tokens.
///
/// # Returns
/// A `TokenStreamIter` iterator for the stream.
pub fn iter(&self) -> TokenStreamIter<'_, T>
{
TokenStreamIter { stream: self,
index: 0 }
}
/// Retrieves a mutable reference to the token at the given index.
///
/// # Arguments
/// * `index` - The position of the token in the stream.
///
/// # Returns
/// `Some(TokenMut)` if the index is valid, otherwise `None`.
///
/// # Safety
/// Uses `unsafe` to split borrows for individual mutable access
/// to components of the token without violating Rusts aliasing rules.
pub fn get_mut(&mut self, index: usize) -> Option<TokenMut<'_, T>>
{
if index < self.lexemes.len()
@ -167,7 +201,6 @@ impl<T> TokenStream<T>
let variant = &mut self.variants[index] as *mut T;
let span = &mut self.locations[index] as *mut Span;
// Convert &mut String to &mut str safely.
unsafe {
Some(TokenMut { lexeme: &mut *lexeme.as_mut()
.unwrap()
@ -182,6 +215,9 @@ impl<T> TokenStream<T>
}
}
/// Clears all tokens from the stream.
///
/// This resets the stream to an empty state.
pub fn clear(&mut self)
{
self.lexemes.clear();
@ -189,6 +225,12 @@ impl<T> TokenStream<T>
self.locations.clear();
}
/// Pushes a new token onto the end of the stream.
///
/// # Arguments
/// * `lexeme` - The text of the token.
/// * `variant` - The token type or value.
/// * `span` - The location of the token in the source.
pub fn push(&mut self, lexeme: String, variant: T, span: Span)
{
self.lexemes.push(lexeme);
@ -196,6 +238,10 @@ impl<T> TokenStream<T>
self.locations.push(span);
}
/// Returns a mutable iterator over the tokens in the stream.
///
/// # Returns
/// A `TokenStreamIterMut` for mutable access to each token component.
pub fn iter_mut(&mut self) -> TokenStreamIterMut<'_, T>
{
TokenStreamIterMut { lexemes: self.lexemes.iter_mut(),
@ -204,7 +250,6 @@ impl<T> TokenStream<T>
}
}
impl<'a, T> IntoIterator for &'a TokenStream<T>
{
type IntoIter = TokenStreamIter<'a, T>;