Enhanced the LexerError and Documentation of Types
I went and enhanced the LexerError to now wrap a Rust source Error. This makes it so we can wrapup the possible IO errors we get when trying to open a file and read its contents. I also added some documentation for all the implemented functions.
This commit is contained in:
parent
31290cc86f
commit
a926e08061
138
src/error.rs
138
src/error.rs
@ -1,59 +1,127 @@
|
|||||||
|
use std::{error::Error, path::PathBuf};
|
||||||
use super::position::Span;
|
use super::position::Span;
|
||||||
|
|
||||||
|
|
||||||
|
/// Represents an error encountered during lexical analysis.
|
||||||
/// An error that has occured during lexigraphical analysis.
|
///
|
||||||
#[derive(Debug, Clone, PartialEq)]
|
/// `LexerError` contains contextual information about where the error
|
||||||
pub struct LexerError
|
/// occurred in the source input, an optional offending snippet,
|
||||||
{
|
/// the file path (if applicable), and an optional underlying error
|
||||||
|
/// that triggered the failure.
|
||||||
|
///
|
||||||
|
/// It is designed to provide detailed diagnostics for file-based or
|
||||||
|
/// in-memory parsing and is compatible with error reporting ecosystems.
|
||||||
|
#[derive(Debug)]
|
||||||
|
pub struct LexerError {
|
||||||
/// A human-readable error message.
|
/// A human-readable error message.
|
||||||
pub message: String,
|
pub message: String,
|
||||||
|
|
||||||
/// The start and end of where the error is located in the file.
|
/// The span where the error occurred.
|
||||||
pub span: Span,
|
pub span: Span,
|
||||||
|
|
||||||
/// The file that the error occured within.
|
/// The file that the error occurred in, if known.
|
||||||
pub file: Option<std::path::PathBuf>,
|
pub file: Option<PathBuf>,
|
||||||
|
|
||||||
/// The problematic string (optional).
|
/// The source snippet related to the error, if known.
|
||||||
pub snippet: Option<String>
|
pub snippet: Option<String>,
|
||||||
|
|
||||||
|
/// An optional underlying error that caused this one.
|
||||||
|
pub source: Option<Box<dyn Error>>,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
impl LexerError {
|
||||||
|
/// Creates a new `LexerError` with a message, span, and optional context.
|
||||||
impl LexerError
|
///
|
||||||
|
/// # Parameters
|
||||||
|
/// - `message`: A human-readable explanation of the error.
|
||||||
|
/// - `span`: The region in the source where the error occurred.
|
||||||
|
/// - `file`: An optional path to the file in which the error occurred.
|
||||||
|
/// - `snippet`: An optional problematic input string.
|
||||||
|
///
|
||||||
|
/// # Returns
|
||||||
|
/// A new instance of `LexerError`.
|
||||||
|
pub fn new<S, T>(
|
||||||
|
message: S,
|
||||||
|
span: Span,
|
||||||
|
file: Option<T>,
|
||||||
|
snippet: Option<S>,
|
||||||
|
) -> Self
|
||||||
|
where
|
||||||
|
S: Into<String>,
|
||||||
|
T: Into<PathBuf>,
|
||||||
{
|
{
|
||||||
pub fn new<S, T>(message: S, span: Span, file: Option<T>,
|
LexerError {
|
||||||
snippet: Option<S>)
|
message: message.into(),
|
||||||
-> Self
|
|
||||||
where S: Into<String>,
|
|
||||||
T: Into<std::path::PathBuf>
|
|
||||||
{
|
|
||||||
LexerError { message: message.into(),
|
|
||||||
span,
|
span,
|
||||||
file: file.map(|t| t.into()),
|
file: file.map(Into::into),
|
||||||
snippet: snippet.map(|s| s.into()) }
|
snippet: snippet.map(Into::into),
|
||||||
|
source: None,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl std::fmt::Display for LexerError
|
/// Creates a `LexerError` from only a message and span.
|
||||||
|
///
|
||||||
|
/// This is useful when file or snippet context is not available.
|
||||||
|
pub fn from_message<S>(message: S, span: Span) -> Self
|
||||||
|
where
|
||||||
|
S: Into<String>,
|
||||||
{
|
{
|
||||||
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result
|
Self::new(message, span, None::<PathBuf>, None::<S>)
|
||||||
{
|
|
||||||
match &self.snippet
|
|
||||||
{
|
|
||||||
Some(snippet) =>
|
|
||||||
{
|
|
||||||
write!(f,
|
|
||||||
"LexerError at {}: {} (snippet: '{}')",
|
|
||||||
self.span, self.message, snippet)
|
|
||||||
}
|
}
|
||||||
None =>
|
|
||||||
|
/// Attaches a snippet of the offending source code.
|
||||||
|
///
|
||||||
|
/// This is helpful for diagnostics and tooling output.
|
||||||
|
pub fn with_snippet<S>(mut self, snippet: S) -> Self
|
||||||
|
where
|
||||||
|
S: Into<String>,
|
||||||
{
|
{
|
||||||
write!(f, "LexerError at {}: {}", self.span, self.message)
|
self.snippet = Some(snippet.into());
|
||||||
|
self
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Attaches the path of the file in which the error occurred.
|
||||||
|
pub fn with_file<T>(mut self, file: T) -> Self
|
||||||
|
where
|
||||||
|
T: Into<PathBuf>,
|
||||||
|
{
|
||||||
|
self.file = Some(file.into());
|
||||||
|
self
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Wraps a source error that caused this `LexerError`.
|
||||||
|
///
|
||||||
|
/// This allows you to chain errors for more detailed diagnostics.
|
||||||
|
pub fn with_source<E>(mut self, err: E) -> Self
|
||||||
|
where
|
||||||
|
E: Error + 'static,
|
||||||
|
{
|
||||||
|
self.source = Some(Box::new(err));
|
||||||
|
self
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl std::error::Error for LexerError {}
|
impl std::fmt::Display for LexerError {
|
||||||
|
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||||
|
write!(f, "Lexer error at {}", self.span)?;
|
||||||
|
|
||||||
|
if let Some(file) = &self.file {
|
||||||
|
write!(f, " in file `{}`", file.display())?;
|
||||||
|
}
|
||||||
|
|
||||||
|
write!(f, ": {}", self.message)?;
|
||||||
|
|
||||||
|
if let Some(snippet) = &self.snippet {
|
||||||
|
write!(f, "\n --> Snippet: `{}`", snippet)?;
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Error for LexerError {
|
||||||
|
/// Returns the underlying cause of this error, if any.
|
||||||
|
fn source(&self) -> Option<&(dyn Error + 'static)> {
|
||||||
|
self.source.as_ref().map(|e| e.as_ref())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
12
src/lexer.rs
12
src/lexer.rs
@ -72,13 +72,12 @@ impl Lexer
|
|||||||
let mut cursor = Position::default();
|
let mut cursor = Position::default();
|
||||||
let mut stream = TokenStream::new();
|
let mut stream = TokenStream::new();
|
||||||
|
|
||||||
let input_file = File::open(&path).map_err(|_error| {
|
let input_file = File::open(&path).map_err(|err| {
|
||||||
LexerError::new(
|
LexerError::new(
|
||||||
"Unable to open file for Lexigraphical Analysis.",
|
"Unable to open file for Lexical Analysis.",
|
||||||
Span::default(),
|
Span::default(),
|
||||||
Some(path.as_ref().to_string_lossy().to_string()),
|
Some(path.as_ref().to_path_buf()),
|
||||||
None,
|
None).with_source(err)
|
||||||
)
|
|
||||||
})?;
|
})?;
|
||||||
|
|
||||||
let reader = BufReader::with_capacity(BUFFER_SIZE, input_file);
|
let reader = BufReader::with_capacity(BUFFER_SIZE, input_file);
|
||||||
@ -138,7 +137,8 @@ impl Lexer
|
|||||||
}
|
}
|
||||||
|
|
||||||
/// Internal: scans a single line of text into tokens.
|
/// Internal: scans a single line of text into tokens.
|
||||||
fn scan(line: &str, stream: &mut TokenStream<TokenType>, cursor: &mut Position)
|
fn scan(line: &str, stream: &mut TokenStream<TokenType>,
|
||||||
|
cursor: &mut Position)
|
||||||
{
|
{
|
||||||
for c in line.chars()
|
for c in line.chars()
|
||||||
{
|
{
|
||||||
|
@ -25,6 +25,14 @@ pub struct Span
|
|||||||
|
|
||||||
impl Position
|
impl Position
|
||||||
{
|
{
|
||||||
|
/// Creates a new `Position` with the given line and column.
|
||||||
|
///
|
||||||
|
/// # Arguments
|
||||||
|
/// * `line` - The line number in the source (0-based).
|
||||||
|
/// * `column` - The column number within the line (0-based).
|
||||||
|
///
|
||||||
|
/// # Returns
|
||||||
|
/// A `Position` representing the specified line and column.
|
||||||
pub fn new(line: usize, column: usize) -> Self
|
pub fn new(line: usize, column: usize) -> Self
|
||||||
{
|
{
|
||||||
Position { line, column }
|
Position { line, column }
|
||||||
@ -51,28 +59,65 @@ impl std::fmt::Display for Position
|
|||||||
|
|
||||||
impl Span
|
impl Span
|
||||||
{
|
{
|
||||||
|
/// Creates a new `Span` from a starting and ending `Position`.
|
||||||
|
///
|
||||||
|
/// # Arguments
|
||||||
|
/// * `start` - The starting position of the span.
|
||||||
|
/// * `end` - The ending position of the span.
|
||||||
|
///
|
||||||
|
/// # Returns
|
||||||
|
/// A `Span` covering the range from `start` to `end`.
|
||||||
pub fn new(start: Position, end: Position) -> Self
|
pub fn new(start: Position, end: Position) -> Self
|
||||||
{
|
{
|
||||||
Span { start, end }
|
Span { start, end }
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Creates a `Span` that covers a single position.
|
||||||
|
///
|
||||||
|
/// Useful for zero-length spans or pinpointing a specific token or
|
||||||
|
/// character.
|
||||||
|
///
|
||||||
|
/// # Arguments
|
||||||
|
/// * `val` - The position to be used as both start and end.
|
||||||
|
///
|
||||||
|
/// # Returns
|
||||||
|
/// A `Span` that starts and ends at `val`.
|
||||||
pub fn with_single(val: Position) -> Self
|
pub fn with_single(val: Position) -> Self
|
||||||
{
|
{
|
||||||
Span { start: val,
|
Span { start: val,
|
||||||
end: val }
|
end: val }
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Merges two spans into one, taking the start of the first
|
||||||
|
/// and the end of the second.
|
||||||
|
///
|
||||||
|
/// # Arguments
|
||||||
|
/// * `a` - The first span.
|
||||||
|
/// * `b` - The second span.
|
||||||
|
///
|
||||||
|
/// # Returns
|
||||||
|
/// A `Span` that starts at `a.start` and ends at `b.end`.
|
||||||
|
///
|
||||||
|
/// # Note
|
||||||
|
/// Assumes that `a` comes before `b` in source order.
|
||||||
pub fn merge(a: Span, b: Span) -> Self
|
pub fn merge(a: Span, b: Span) -> Self
|
||||||
{
|
{
|
||||||
Span { start: a.start,
|
Span { start: a.start,
|
||||||
end: b.end }
|
end: b.end }
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn merge_with(&self, other: Span) -> Span {
|
/// Merges this span with another, producing a new span
|
||||||
Span {
|
/// from `self.start` to `other.end`.
|
||||||
start: self.start,
|
///
|
||||||
end: other.end,
|
/// # Arguments
|
||||||
}
|
/// * `other` - Another span to merge with.
|
||||||
|
///
|
||||||
|
/// # Returns
|
||||||
|
/// A new `Span` from the start of `self` to the end of `other`.
|
||||||
|
pub fn merge_with(&self, other: Span) -> Span
|
||||||
|
{
|
||||||
|
Span { start: self.start,
|
||||||
|
end: other.end }
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
49
src/token.rs
49
src/token.rs
@ -120,6 +120,10 @@ pub struct TokenStreamIterMut<'a, T>
|
|||||||
|
|
||||||
impl<T> TokenStream<T>
|
impl<T> TokenStream<T>
|
||||||
{
|
{
|
||||||
|
/// Creates a new, empty `TokenStream`.
|
||||||
|
///
|
||||||
|
/// # Returns
|
||||||
|
/// A `TokenStream` with no tokens.
|
||||||
pub fn new() -> Self
|
pub fn new() -> Self
|
||||||
{
|
{
|
||||||
TokenStream { lexemes: Vec::new(),
|
TokenStream { lexemes: Vec::new(),
|
||||||
@ -127,16 +131,31 @@ impl<T> TokenStream<T>
|
|||||||
locations: Vec::new() }
|
locations: Vec::new() }
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Returns the number of tokens in the stream.
|
||||||
|
///
|
||||||
|
/// # Returns
|
||||||
|
/// The number of tokens currently stored.
|
||||||
pub fn len(&self) -> usize
|
pub fn len(&self) -> usize
|
||||||
{
|
{
|
||||||
self.lexemes.len()
|
self.lexemes.len()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Checks if the token stream is empty.
|
||||||
|
///
|
||||||
|
/// # Returns
|
||||||
|
/// `true` if there are no tokens, `false` otherwise.
|
||||||
pub fn is_empty(&self) -> bool
|
pub fn is_empty(&self) -> bool
|
||||||
{
|
{
|
||||||
self.lexemes.is_empty()
|
self.lexemes.is_empty()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Retrieves an immutable reference to the token at the given index.
|
||||||
|
///
|
||||||
|
/// # Arguments
|
||||||
|
/// * `index` - The position of the token in the stream.
|
||||||
|
///
|
||||||
|
/// # Returns
|
||||||
|
/// `Some(Token)` if the index is valid, otherwise `None`.
|
||||||
pub fn get(&self, index: usize) -> Option<Token<'_, T>>
|
pub fn get(&self, index: usize) -> Option<Token<'_, T>>
|
||||||
{
|
{
|
||||||
if index < self.lexemes.len()
|
if index < self.lexemes.len()
|
||||||
@ -151,12 +170,27 @@ impl<T> TokenStream<T>
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Returns an iterator over immutable references to the tokens.
|
||||||
|
///
|
||||||
|
/// # Returns
|
||||||
|
/// A `TokenStreamIter` iterator for the stream.
|
||||||
pub fn iter(&self) -> TokenStreamIter<'_, T>
|
pub fn iter(&self) -> TokenStreamIter<'_, T>
|
||||||
{
|
{
|
||||||
TokenStreamIter { stream: self,
|
TokenStreamIter { stream: self,
|
||||||
index: 0 }
|
index: 0 }
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Retrieves a mutable reference to the token at the given index.
|
||||||
|
///
|
||||||
|
/// # Arguments
|
||||||
|
/// * `index` - The position of the token in the stream.
|
||||||
|
///
|
||||||
|
/// # Returns
|
||||||
|
/// `Some(TokenMut)` if the index is valid, otherwise `None`.
|
||||||
|
///
|
||||||
|
/// # Safety
|
||||||
|
/// Uses `unsafe` to split borrows for individual mutable access
|
||||||
|
/// to components of the token without violating Rust’s aliasing rules.
|
||||||
pub fn get_mut(&mut self, index: usize) -> Option<TokenMut<'_, T>>
|
pub fn get_mut(&mut self, index: usize) -> Option<TokenMut<'_, T>>
|
||||||
{
|
{
|
||||||
if index < self.lexemes.len()
|
if index < self.lexemes.len()
|
||||||
@ -167,7 +201,6 @@ impl<T> TokenStream<T>
|
|||||||
let variant = &mut self.variants[index] as *mut T;
|
let variant = &mut self.variants[index] as *mut T;
|
||||||
let span = &mut self.locations[index] as *mut Span;
|
let span = &mut self.locations[index] as *mut Span;
|
||||||
|
|
||||||
// Convert &mut String to &mut str safely.
|
|
||||||
unsafe {
|
unsafe {
|
||||||
Some(TokenMut { lexeme: &mut *lexeme.as_mut()
|
Some(TokenMut { lexeme: &mut *lexeme.as_mut()
|
||||||
.unwrap()
|
.unwrap()
|
||||||
@ -182,6 +215,9 @@ impl<T> TokenStream<T>
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Clears all tokens from the stream.
|
||||||
|
///
|
||||||
|
/// This resets the stream to an empty state.
|
||||||
pub fn clear(&mut self)
|
pub fn clear(&mut self)
|
||||||
{
|
{
|
||||||
self.lexemes.clear();
|
self.lexemes.clear();
|
||||||
@ -189,6 +225,12 @@ impl<T> TokenStream<T>
|
|||||||
self.locations.clear();
|
self.locations.clear();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Pushes a new token onto the end of the stream.
|
||||||
|
///
|
||||||
|
/// # Arguments
|
||||||
|
/// * `lexeme` - The text of the token.
|
||||||
|
/// * `variant` - The token type or value.
|
||||||
|
/// * `span` - The location of the token in the source.
|
||||||
pub fn push(&mut self, lexeme: String, variant: T, span: Span)
|
pub fn push(&mut self, lexeme: String, variant: T, span: Span)
|
||||||
{
|
{
|
||||||
self.lexemes.push(lexeme);
|
self.lexemes.push(lexeme);
|
||||||
@ -196,6 +238,10 @@ impl<T> TokenStream<T>
|
|||||||
self.locations.push(span);
|
self.locations.push(span);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Returns a mutable iterator over the tokens in the stream.
|
||||||
|
///
|
||||||
|
/// # Returns
|
||||||
|
/// A `TokenStreamIterMut` for mutable access to each token component.
|
||||||
pub fn iter_mut(&mut self) -> TokenStreamIterMut<'_, T>
|
pub fn iter_mut(&mut self) -> TokenStreamIterMut<'_, T>
|
||||||
{
|
{
|
||||||
TokenStreamIterMut { lexemes: self.lexemes.iter_mut(),
|
TokenStreamIterMut { lexemes: self.lexemes.iter_mut(),
|
||||||
@ -204,7 +250,6 @@ impl<T> TokenStream<T>
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
impl<'a, T> IntoIterator for &'a TokenStream<T>
|
impl<'a, T> IntoIterator for &'a TokenStream<T>
|
||||||
{
|
{
|
||||||
type IntoIter = TokenStreamIter<'a, T>;
|
type IntoIter = TokenStreamIter<'a, T>;
|
||||||
|
Loading…
x
Reference in New Issue
Block a user