Fixing how the Lexer handles text.

Scanning a file has all the lines terminating with a '\n' newline character, but when giving the text directly to the lexer via the scan_text function it should not append a newline at the end if there was no newline in the original input.
Added a Lookahead iterator.
2025-05-05 18:17:32 -04:00 · 2025-05-05 18:17:32 -04:00 · 2025-05-05 18:17:32 -04:00 · 2025-05-05 18:17:32 -04:00 · 2025-05-05 18:17:32 -04:00
11 changed files with 925 additions and 244 deletions
--- a/Cargo.lock
+++ b/Cargo.lock
@ -4,4 +4,4 @@ version = 4
 [[package]]
 name = "rune"
-version = "0.2.0"
+version = "0.3.0"
--- a/Cargo.toml
+++ b/Cargo.toml
@ -1,6 +1,6 @@
 [package]
 name = "rune"
-version = "0.2.0"
+version = "0.3.0"
 edition = "2021"
 description = "A lexical analysis library."
 repository = "/myrddin/rune"
--- a/README.md
+++ b/README.md
@ -31,5 +31,5 @@ Then add this to your Cargo.toml file.
 ```toml
 [dependencies]
-rune = { version = "0.2.0", registry = "cybermages" }
+rune = { version = "0.3.0", registry = "cybermages" }
 ```
--- a/examples/basic.rs
+++ b/examples/basic.rs
@ -5,133 +5,159 @@ use rune::{Lexer, Span, TokenStream, TokenType};
 #[derive(Debug, Clone, PartialEq, Eq)]
-pub enum MarkdownTokenType {
+pub enum MarkdownTokenType
-    Heading(u8),
+{
-    EmphasisStart,
+   Heading(u8),
-    EmphasisEnd,
+   EmphasisStart,
-    StrongStart,
+   EmphasisEnd,
-    StrongEnd,
+   StrongStart,
-    CodeSpan,
+   StrongEnd,
-    Text,
+   CodeSpan,
-    Symbol,
+   Text,
-    Whitespace,
+   Symbol,
-    Newline,
+   Whitespace,
-    Unknown,
+   Newline,
   Unknown
 }
-impl std::fmt::Display for MarkdownTokenType {
+impl std::fmt::Display for MarkdownTokenType
-    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+{
-        match self {
+   fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result
-            MarkdownTokenType::Heading(level) => write!(f, "Heading({})", level),
+   {
-            MarkdownTokenType::EmphasisStart => write!(f, "EmphasisStart"),
+      match self
-            MarkdownTokenType::EmphasisEnd => write!(f, "EmphasisEnd"),
+      {
-            MarkdownTokenType::StrongStart => write!(f, "StrongStart"),
+         MarkdownTokenType::Heading(level) => write!(f, "Heading({})", level),
-            MarkdownTokenType::StrongEnd => write!(f, "StrongEnd"),
+         MarkdownTokenType::EmphasisStart => write!(f, "EmphasisStart"),
-            MarkdownTokenType::CodeSpan => write!(f, "CodeSpan"),
+         MarkdownTokenType::EmphasisEnd => write!(f, "EmphasisEnd"),
-            MarkdownTokenType::Text => write!(f, "Text"),
+         MarkdownTokenType::StrongStart => write!(f, "StrongStart"),
-            MarkdownTokenType::Symbol => write!(f, "Symbol"),
+         MarkdownTokenType::StrongEnd => write!(f, "StrongEnd"),
-            MarkdownTokenType::Whitespace => write!(f, "Whitespace"),
+         MarkdownTokenType::CodeSpan => write!(f, "CodeSpan"),
-            MarkdownTokenType::Newline => write!(f, "Newline"),
+         MarkdownTokenType::Text => write!(f, "Text"),
-            MarkdownTokenType::Unknown => write!(f, "Unknown"),
+         MarkdownTokenType::Symbol => write!(f, "Symbol"),
-        }
+         MarkdownTokenType::Whitespace => write!(f, "Whitespace"),
-    }
+         MarkdownTokenType::Newline => write!(f, "Newline"),
         MarkdownTokenType::Unknown => write!(f, "Unknown")
      }
   }
 }
 // Define how you want to interpret base tokens
-pub fn transform(input: &TokenStream<TokenType>) -> TokenStream<MarkdownTokenType> {
+pub fn transform(input: &TokenStream<TokenType>)
-    let mut output = TokenStream::new();
+                 -> TokenStream<MarkdownTokenType>
 {
   let mut output = TokenStream::new();
-    let mut i = 0;
+   let mut i = 0;
-    while i < input.len() {
+   while i < input.len()
-        let token = input.get(i).unwrap(); // safe due to bounds check above
+   {
      let token = input.get(i).unwrap(); // safe due to bounds check above
-        match token.variant {
+      match token.variant
-            TokenType::Symbol if token.lexeme == "#" => {
+      {
-                // Count consecutive #s for heading level
+         TokenType::Symbol if token.lexeme == "#" =>
-                let mut level = 1;
+         {
-                let mut span = token.span.clone();
+            // Count consecutive #s for heading level
            let mut level = 1;
            let mut span = token.span.clone();
-                while i + 1 < input.len() {
+            while i + 1 < input.len()
-                    let next = input.get(i + 1).unwrap();
+            {
-                    if *next.variant == TokenType::Symbol && next.lexeme == "#" {
+               let next = input.get(i + 1).unwrap();
-                        level += 1;
+               if *next.variant == TokenType::Symbol && next.lexeme == "#"
-                        span.end = next.span.end;
+               {
-                        i += 1;
+                  level += 1;
-                    } else {
+                  span.end = next.span.end;
-                        break;
+                  i += 1;
-                    }
+               }
-                }
+               else
-
+               {
-                output.push(token.lexeme.repeat(level),
+                  break;
-                            MarkdownTokenType::Heading(level as u8),
+               }
                            span);
            }
-            TokenType::Symbol if token.lexeme == "*" => {
+            output.push(token.lexeme.repeat(level),
-                // Look ahead to see if it's strong (**) or emphasis (*)
+                        MarkdownTokenType::Heading(level as u8),
-                if i + 1 < input.len() {
+                        span);
-                    let next = input.get(i + 1).unwrap();
+         }
-                    if *next.variant == TokenType::Symbol && next.lexeme == "*" {
+
-                        output.push("**".to_string(),
+         TokenType::Symbol if token.lexeme == "*" =>
-                                    MarkdownTokenType::StrongStart,
+         {
-                                    Span::merge(*token.span, *next.span));
+            // Look ahead to see if it's strong (**) or emphasis (*)
-                        i += 1; // skip the second '*'
+            if i + 1 < input.len()
-                    } else {
+            {
-                        output.push("*".to_string(),
+               let next = input.get(i + 1).unwrap();
-                                    MarkdownTokenType::EmphasisStart,
+               if *next.variant == TokenType::Symbol && next.lexeme == "*"
-                                    token.span.clone());
+               {
-                    }
+                  output.push("**".to_string(),
-                } else {
+                              MarkdownTokenType::StrongStart,
-                    output.push("*".to_string(),
+                              Span::merge(*token.span, *next.span));
-                                MarkdownTokenType::EmphasisStart,
+                  i += 1; // skip the second '*'
-                                token.span.clone());
+               }
-                }
+               else
               {
                  output.push("*".to_string(),
                              MarkdownTokenType::EmphasisStart,
                              token.span.clone());
               }
            }
-
+            else
-            TokenType::Symbol if token.lexeme == "`" => {
+            {
-                output.push(token.lexeme.to_string(),
+               output.push("*".to_string(),
-                            MarkdownTokenType::CodeSpan,
+                           MarkdownTokenType::EmphasisStart,
-                            token.span.clone());
+                           token.span.clone());
            }
         }
-            TokenType::Text => {
+         TokenType::Symbol if token.lexeme == "`" =>
-                output.push(token.lexeme.to_string(),
+         {
-                            MarkdownTokenType::Text,
+            output.push(token.lexeme.to_string(),
-                            token.span.clone());
+                        MarkdownTokenType::CodeSpan,
-            }
+                        token.span.clone());
         }
-            TokenType::Symbol => {
+         TokenType::Text =>
-                output.push(token.lexeme.to_string(),
+         {
-                            MarkdownTokenType::Symbol,
+            output.push(token.lexeme.to_string(),
-                            token.span.clone());
+                        MarkdownTokenType::Text,
-            }
+                        token.span.clone());
         }
-            TokenType::Whitespace => {
+         TokenType::Symbol =>
-                output.push(token.lexeme.to_string(),
+         {
-                            MarkdownTokenType::Whitespace,
+            output.push(token.lexeme.to_string(),
-                            token.span.clone());
+                        MarkdownTokenType::Symbol,
-            }
+                        token.span.clone());
         }
-            TokenType::Newline => {
+         TokenType::Whitespace =>
-                output.push(token.lexeme.to_string(),
+         {
-                            MarkdownTokenType::Newline,
+            output.push(token.lexeme.to_string(),
-                            token.span.clone());
+                        MarkdownTokenType::Whitespace,
-            }
+                        token.span.clone());
         }
-            _ => {
+         TokenType::Newline =>
-                output.push(token.lexeme.to_string(),
+         {
-                            MarkdownTokenType::Unknown,
+            output.push(token.lexeme.to_string(),
-                            token.span.clone());
+                        MarkdownTokenType::Newline,
-            }
+                        token.span.clone());
-        }
+         }
-        i += 1;
+         _ =>
-    }
+         {
            output.push(token.lexeme.to_string(),
                        MarkdownTokenType::Unknown,
                        token.span.clone());
         }
      }
-    output
+      i += 1;
   }
   output
 }
--- a/src/ast.rs
+++ b/src/ast.rs
@ -0,0 +1,327 @@
 use std::collections::VecDeque;
 use super::position::Span;
 /// A unique identifier for a node in the AST. Internally, this is just an index
 /// into the node arrays.
 pub type NodeId = usize;
 /// The possible orders in which an AST may be stored for traversal.
 #[derive(Debug, Clone, Copy, PartialEq, Eq)]
 pub enum TraversalOrder
 {
   /// Depth-first search (pre-order) layout.
   DFS,
   /// Breadth-first search layout.
   BFS
 }
 /// The data associated with a single node in the AST.
 #[derive(Debug, Clone)]
 pub struct NodeData<T>
 {
   pub span: Span,
   pub data: T
 }
 /// The mutable AST structure used during parsing. Nodes are created
 /// incrementally and linked via parent relationships. Traversal order is not
 /// guaranteed until `optimize()` is called.
 pub struct Ast<T>
 {
   nodes: Vec<NodeData<T>>,
   parents: Vec<Option<NodeId>>
 }
 /// An optimized, immutable AST layout produced from `Ast<T>::optimize`.
 /// This structure is ideal for traversal, analysis, and code generation.
 pub struct OptimizedAst<T>
 {
   /// Node data in a linear layout (DFS or BFS order).
   pub nodes: Vec<NodeData<T>>,
   /// Each node’s parent, if any.
   pub parents: Vec<Option<NodeId>>,
   /// The traversal order the nodes are stored in.
   pub order: TraversalOrder
 }
 impl<T> Ast<T>
 {
   /// Creates a new, empty AST.
   pub fn new() -> Self
   {
      Ast { nodes: Vec::new(),
            parents: Vec::new() }
   }
   /// Returns the parent of a node, if any.
   pub fn get_parent(&self, id: NodeId) -> Option<NodeId>
   {
      self.parents.get(id).copied().flatten()
   }
   /// Returns a reference to the node data at the given ID, if it exists.
   pub fn get(&self, id: NodeId) -> Option<&NodeData<T>>
   {
      self.nodes.get(id)
   }
   /// Returns a mutable reference to the node data at the given ID, if it
   /// exists.
   pub fn get_mut(&mut self, id: NodeId) -> Option<&mut NodeData<T>>
   {
      self.nodes.get_mut(id)
   }
   /// Adds a new node to the AST.
   ///
   /// - `data`: The custom payload of the node (usually an enum or struct).
   /// - `span`: The source span the node represents.
   /// - `parent`: Optional parent NodeId to attach this node to.
   ///
   /// Returns the NodeId of the newly added node.
   pub fn add_node(&mut self, data: T, span: Span, parent: Option<NodeId>)
                   -> NodeId
   {
      let id = self.nodes.len();
      self.nodes.push(NodeData { data, span });
      self.parents.push(parent);
      id
   }
   /// Joins another AST into this one, returning a mapping from old node IDs
   /// in `other` to new node IDs in `self`.
   ///
   /// Optionally attaches all root nodes of the other AST to a parent node
   /// in the current AST.
   pub fn join(&mut self, other: Ast<T>, attach_to: Option<NodeId>)
               -> Vec<NodeId>
   {
      let base_id = self.nodes.len();
      let mut id_map = Vec::with_capacity(other.nodes.len());
      for (i, node) in other.nodes.into_iter().enumerate()
      {
         self.nodes.push(node);
         let new_parent = match other.parents[i]
         {
            Some(pid) => Some(base_id + pid),
            None => attach_to // attach root nodes to given parent if provided
         };
         self.parents.push(new_parent);
         id_map.push(base_id + i);
      }
      id_map
   }
   /// Prunes the subtree rooted at `root`, compacting the AST in place.
   /// Node IDs will change after this operation.
   pub fn prune(&mut self, root: NodeId)
   {
      let mut to_remove = Vec::new();
      collect_descendants(root, &self.parents, &mut to_remove);
      to_remove.push(root);
      let mut is_removed = vec![false; self.nodes.len()];
      for &id in &to_remove
      {
         is_removed[id] = true;
      }
      let mut remap = vec![None; self.nodes.len()];
      let mut next_insert = 0;
      for i in 0..self.nodes.len()
      {
         if !is_removed[i]
         {
            if i != next_insert
            {
               self.nodes.swap(i, next_insert);
               self.parents.swap(i, next_insert);
            }
            remap[i] = Some(next_insert);
            next_insert += 1;
         }
      }
      self.nodes.truncate(next_insert);
      self.parents.truncate(next_insert);
      for parent in self.parents.iter_mut()
      {
         if let Some(pid) = *parent
         {
            *parent = remap[pid];
         }
      }
   }
   /// Optimizes the AST layout for a specific traversal order (DFS or BFS).
   ///
   /// This consumes the `Ast`, rearranges the internal storage so that
   /// iterating over the nodes reflects the chosen traversal order, and
   /// returns a new, immutable `OptimizedAst<T>`.
   ///
   /// No need for `T: Clone` anymore, since we will move data instead of
   /// cloning it.
   pub fn optimize(self, order: TraversalOrder) -> OptimizedAst<T>
   {
      let ordering = match order
      {
         TraversalOrder::DFS => dfs_order(&self.parents),
         TraversalOrder::BFS => bfs_order(&self.parents)
      };
      let mut remap = vec![0; self.nodes.len()];
      for (new_id, &old_id) in ordering.iter().enumerate()
      {
         remap[old_id] = new_id;
      }
      // Wrap nodes in Option to allow taking them by value without cloning
      let mut nodes_opt: Vec<Option<NodeData<T>>> =
         self.nodes.into_iter().map(Some).collect();
      let mut new_nodes = Vec::with_capacity(nodes_opt.len());
      let mut new_parents = vec![None; self.parents.len()];
      for &old_id in &ordering
      {
         let new_id = remap[old_id];
         let node = nodes_opt[old_id].take()
                                     .expect("Node was already moved out");
         let parent = self.parents[old_id].map(|pid| remap[pid]);
         new_nodes.push(node);
         new_parents[new_id] = parent;
      }
      OptimizedAst { nodes: new_nodes,
                     parents: new_parents,
                     order }
   }
 }
 /// Helper to recursively collect all descendants of a node.
 fn collect_descendants(root: NodeId, parents: &[Option<NodeId>],
                       acc: &mut Vec<NodeId>)
 {
   for (i, &parent) in parents.iter().enumerate()
   {
      if parent == Some(root)
      {
         collect_descendants(i, parents, acc);
         acc.push(i);
      }
   }
 }
 /// Recursively visits nodes in a depth-first (pre-order) manner starting from
 /// `current`, building up the DFS traversal order.
 ///
 /// - `current`: The current node ID being visited.
 /// - `parents`: A slice representing the parent relationship for each node
 ///   (index = child, value = optional parent).
 /// - `order`: A mutable vector that will accumulate the DFS traversal order.
 /// - `visited`: A mutable slice used to track which nodes have already been
 ///   visited.
 fn visit(current: NodeId, parents: &[Option<NodeId>], order: &mut Vec<NodeId>,
         visited: &mut [bool])
 {
   // Skip this node if it's already been visited
   if visited[current]
   {
      return;
   }
   // Mark the node as visited to avoid reprocessing it
   visited[current] = true;
   // Add this node to the traversal order (pre-order)
   order.push(current);
   // Recursively visit all child nodes (i.e., nodes that list `current` as
   // their parent)
   for (i, &parent) in parents.iter().enumerate()
   {
      if parent == Some(current)
      {
         visit(i, parents, order, visited);
      }
   }
 }
 /// Computes the DFS (depth-first, pre-order) ordering of nodes based on the
 /// parent table.
 ///
 /// Returns a Vec<NodeId> containing the node IDs in DFS order.
 fn dfs_order(parents: &[Option<NodeId>]) -> Vec<NodeId>
 {
   let mut order = Vec::new();
   let mut visited = vec![false; parents.len()];
   // Start DFS from all root nodes (nodes with no parent).
   for (i, &parent) in parents.iter().enumerate()
   {
      if parent.is_none()
      {
         visit(i, parents, &mut order, &mut visited);
      }
   }
   order
 }
 /// Computes the BFS (breadth-first) ordering of nodes based on the parent
 /// table.
 ///
 /// Returns a Vec<NodeId> containing the node IDs in BFS order.
 fn bfs_order(parents: &[Option<NodeId>]) -> Vec<NodeId>
 {
   let mut order = Vec::new();
   let mut visited = vec![false; parents.len()];
   let mut queue = VecDeque::new();
   // Start BFS from all root nodes.
   for (i, &parent) in parents.iter().enumerate()
   {
      if parent.is_none()
      {
         queue.push_back(i);
      }
   }
   while let Some(current) = queue.pop_front()
   {
      if visited[current]
      {
         continue;
      }
      visited[current] = true;
      order.push(current);
      for (i, &parent) in parents.iter().enumerate()
      {
         if parent == Some(current)
         {
            queue.push_back(i);
         }
      }
   }
   order
 }
--- a/src/error.rs
+++ b/src/error.rs
@ -1,4 +1,6 @@
-use std::{error::Error, path::PathBuf};
+use std::error::Error;
 use std::path::PathBuf;
 use super::position::Span;
@ -12,116 +14,114 @@ use super::position::Span;
 /// It is designed to provide detailed diagnostics for file-based or
 /// in-memory parsing and is compatible with error reporting ecosystems.
 #[derive(Debug)]
-pub struct LexerError {
+pub struct LexerError
-    /// A human-readable error message.
+{
-    pub message: String,
+   /// A human-readable error message.
   pub message: String,
-    /// The span where the error occurred.
+   /// The span where the error occurred.
-    pub span: Span,
+   pub span: Span,
-    /// The file that the error occurred in, if known.
+   /// The file that the error occurred in, if known.
-    pub file: Option<PathBuf>,
+   pub file: Option<PathBuf>,
-    /// The source snippet related to the error, if known.
+   /// The source snippet related to the error, if known.
-    pub snippet: Option<String>,
+   pub snippet: Option<String>,
-    /// An optional underlying error that caused this one.
+   /// An optional underlying error that caused this one.
-    pub source: Option<Box<dyn Error>>,
+   pub source: Option<Box<dyn Error>>
 }
-impl LexerError {
+impl LexerError
-    /// Creates a new `LexerError` with a message, span, and optional context.
+{
-    ///
+   /// Creates a new `LexerError` with a message, span, and optional context.
-    /// # Parameters
+   ///
-    /// - `message`: A human-readable explanation of the error.
+   /// # Parameters
-    /// - `span`: The region in the source where the error occurred.
+   /// - `message`: A human-readable explanation of the error.
-    /// - `file`: An optional path to the file in which the error occurred.
+   /// - `span`: The region in the source where the error occurred.
-    /// - `snippet`: An optional problematic input string.
+   /// - `file`: An optional path to the file in which the error occurred.
-    ///
+   /// - `snippet`: An optional problematic input string.
-    /// # Returns
+   ///
-    /// A new instance of `LexerError`.
+   /// # Returns
-    pub fn new<S, T>(
+   /// A new instance of `LexerError`.
-        message: S,
+   pub fn new<S, T>(message: S, span: Span, file: Option<T>,
-        span: Span,
+                    snippet: Option<S>)
-        file: Option<T>,
+                    -> Self
-        snippet: Option<S>,
+      where S: Into<String>,
-    ) -> Self
+            T: Into<PathBuf>
-    where
+   {
-        S: Into<String>,
+      LexerError { message: message.into(),
-        T: Into<PathBuf>,
+                   span,
-    {
+                   file: file.map(Into::into),
-        LexerError {
+                   snippet: snippet.map(Into::into),
-            message: message.into(),
+                   source: None }
-            span,
+   }
            file: file.map(Into::into),
            snippet: snippet.map(Into::into),
            source: None,
        }
    }
-    /// Creates a `LexerError` from only a message and span.
+   /// Creates a `LexerError` from only a message and span.
-    ///
+   ///
-    /// This is useful when file or snippet context is not available.
+   /// This is useful when file or snippet context is not available.
-    pub fn from_message<S>(message: S, span: Span) -> Self
+   pub fn from_message<S>(message: S, span: Span) -> Self
-    where
+      where S: Into<String>
-        S: Into<String>,
+   {
-    {
+      Self::new(message, span, None::<PathBuf>, None::<S>)
-        Self::new(message, span, None::<PathBuf>, None::<S>)
+   }
    }
-    /// Attaches a snippet of the offending source code.
+   /// Attaches a snippet of the offending source code.
-    ///
+   ///
-    /// This is helpful for diagnostics and tooling output.
+   /// This is helpful for diagnostics and tooling output.
-    pub fn with_snippet<S>(mut self, snippet: S) -> Self
+   pub fn with_snippet<S>(mut self, snippet: S) -> Self
-    where
+      where S: Into<String>
-        S: Into<String>,
+   {
-    {
+      self.snippet = Some(snippet.into());
-        self.snippet = Some(snippet.into());
+      self
-        self
+   }
    }
-    /// Attaches the path of the file in which the error occurred.
+   /// Attaches the path of the file in which the error occurred.
-    pub fn with_file<T>(mut self, file: T) -> Self
+   pub fn with_file<T>(mut self, file: T) -> Self
-    where
+      where T: Into<PathBuf>
-        T: Into<PathBuf>,
+   {
-    {
+      self.file = Some(file.into());
-        self.file = Some(file.into());
+      self
-        self
+   }
    }
-    /// Wraps a source error that caused this `LexerError`.
+   /// Wraps a source error that caused this `LexerError`.
-    ///
+   ///
-    /// This allows you to chain errors for more detailed diagnostics.
+   /// This allows you to chain errors for more detailed diagnostics.
-    pub fn with_source<E>(mut self, err: E) -> Self
+   pub fn with_source<E>(mut self, err: E) -> Self
-    where
+      where E: Error + 'static
-        E: Error + 'static,
+   {
-    {
+      self.source = Some(Box::new(err));
-        self.source = Some(Box::new(err));
+      self
-        self
+   }
    }
 }
-impl std::fmt::Display for LexerError {
+impl std::fmt::Display for LexerError
-    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+{
-        write!(f, "Lexer error at {}", self.span)?;
+   fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result
   {
      write!(f, "Lexer error at {}", self.span)?;
-        if let Some(file) = &self.file {
+      if let Some(file) = &self.file
-            write!(f, " in file `{}`", file.display())?;
+      {
-        }
+         write!(f, " in file `{}`", file.display())?;
      }
-        write!(f, ": {}", self.message)?;
+      write!(f, ": {}", self.message)?;
-        if let Some(snippet) = &self.snippet {
+      if let Some(snippet) = &self.snippet
-            write!(f, "\n  --> Snippet: `{}`", snippet)?;
+      {
-        }
+         write!(f, "\n  --> Snippet: `{}`", snippet)?;
      }
-        Ok(())
+      Ok(())
-    }
+   }
 }
-impl Error for LexerError {
+impl Error for LexerError
-    /// Returns the underlying cause of this error, if any.
+{
-    fn source(&self) -> Option<&(dyn Error + 'static)> {
+   /// Returns the underlying cause of this error, if any.
-        self.source.as_ref().map(|e| e.as_ref())
+   fn source(&self) -> Option<&(dyn Error + 'static)>
-    }
+   {
      self.source.as_ref().map(|e| e.as_ref())
   }
 }
--- a/src/iter.rs
+++ b/src/iter.rs
@ -0,0 +1,209 @@
 //! An iterator adapter for arbitrary lookahead functionality.
 //!
 //! This module provides [`Lookahead`], an adapter for any iterator that allows
 //! you to peek ahead by any number of elements, without consuming them.
 //!
 //! ## Example
 //! ```
 //! use rune::LookaheadExt;
 //!
 //! let mut it = vec![10, 20, 30].into_iter().lookahead();
 //!
 //! assert_eq!(it.peek(0), Some(&10));
 //! assert_eq!(it.peek(1), Some(&20));
 //! assert_eq!(it.next(), Some(10));
 //! assert_eq!(it.peek(0), Some(&20));
 //! ```
 use std::collections::VecDeque;
 use std::fmt;
 use std::iter::{Fuse, FusedIterator};
 /// An iterator adapter that allows arbitrary lookahead peeking.
 ///
 /// This struct wraps an iterator and buffers items so that any future
 /// item can be accessed by index without consuming them. Similar to
 /// [`std::iter::Peekable`], but supports peeking any number of steps ahead.
 pub struct Lookahead<I>
   where I: Iterator
 {
   iter: Fuse<I>,
   buffer: VecDeque<I::Item>
 }
 impl<I> Lookahead<I> where I: Iterator
 {
   /// Creates a new [`Lookahead`] from the given iterator.
   ///
   /// This constructor is typically used indirectly via the
   /// [`LookaheadExt::lookahead()`] method or [`lookahead()`] free function.
   #[must_use]
   pub fn new(iter: I) -> Self
   {
      Lookahead { iter: iter.fuse(),
                  buffer: VecDeque::new() }
   }
   /// Returns a reference to the `n`th upcoming item, if it exists.
   ///
   /// `peek(0)` is the same as peeking at the next item.
   ///
   /// This does **not consume** any items from the iterator.
   ///
   /// # Examples
   /// ```
   /// use rune::LookaheadExt;
   ///
   /// let mut it = vec![1, 2, 3].into_iter().lookahead();
   ///
   /// assert_eq!(it.peek(1), Some(&2));
   /// assert_eq!(it.next(), Some(1));
   /// ```
   pub fn peek(&mut self, n: usize) -> Option<&I::Item>
   {
      while self.buffer.len() <= n
      {
         if let Some(item) = self.iter.next()
         {
            self.buffer.push_back(item);
         }
         else
         {
            break;
         }
      }
      self.buffer.get(n)
   }
   /// Returns a mutable reference to the `n`th upcoming item, if it exists.
   ///
   /// This allows in-place modification of peeked items before consumption.
   ///
   /// # Examples
   /// ```
   /// use rune::LookaheadExt;
   ///
   /// let mut it = vec![1, 2, 3].into_iter().lookahead();
   /// if let Some(x) = it.peek_mut(1)
   /// {
   ///    *x *= 10;
   /// }
   /// assert_eq!(it.next(), Some(1));
   /// assert_eq!(it.next(), Some(20));
   /// ```
   pub fn peek_mut(&mut self, n: usize) -> Option<&mut I::Item>
   {
      while self.buffer.len() <= n
      {
         if let Some(item) = self.iter.next()
         {
            self.buffer.push_back(item);
         }
         else
         {
            break;
         }
      }
      self.buffer.get_mut(n)
   }
 }
 impl<I> Iterator for Lookahead<I> where I: Iterator
 {
   type Item = I::Item;
   /// Retrieves the next item, consuming it.
   ///
   /// If any items were previously peeked and buffered, they are returned
   /// first before accessing the underlying iterator.
   fn next(&mut self) -> Option<Self::Item>
   {
      if let Some(front) = self.buffer.pop_front()
      {
         Some(front)
      }
      else
      {
         self.iter.next()
      }
   }
   /// Provides a size hint accounting for both buffered and remaining elements.
   fn size_hint(&self) -> (usize, Option<usize>)
   {
      let (low, high) = self.iter.size_hint();
      let buffered = self.buffer.len();
      (low.saturating_add(buffered), high.and_then(|h| h.checked_add(buffered)))
   }
 }
 impl<I> Clone for Lookahead<I>
   where I: Iterator + Clone,
         I::Item: Clone
 {
   fn clone(&self) -> Self
   {
      Lookahead { iter: self.iter.clone(),
                  buffer: self.buffer.clone() }
   }
 }
 impl<I> fmt::Debug for Lookahead<I>
   where I: Iterator + fmt::Debug,
         I::Item: fmt::Debug
 {
   fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result
   {
      f.debug_struct("Lookahead")
       .field("iter", &self.iter)
       .field("buffer", &self.buffer)
       .finish()
   }
 }
 impl<I> FusedIterator for Lookahead<I> where I: Iterator + FusedIterator {}
 /// Extension trait to provide `.lookahead()` on all iterators.
 ///
 /// This lets you easily call `.lookahead()` on any iterator to
 /// create a [`Lookahead`] instance.
 pub trait LookaheadExt: Iterator + Sized
 {
   /// Wraps the iterator in a [`Lookahead`] adapter.
   fn lookahead(self) -> Lookahead<Self>;
 }
 impl<I: Iterator> LookaheadExt for I
 {
   fn lookahead(self) -> Lookahead<Self>
   {
      Lookahead::new(self)
   }
 }
 /// Creates a [`Lookahead`] from any iterable.
 ///
 /// This is a convenience function for use in functional-style code or
 /// when not using the extension trait.
 ///
 /// # Example
 /// ```
 /// use rune::lookahead;
 ///
 /// let mut it = lookahead(vec![1, 2, 3]);
 ///
 /// assert_eq!(it.peek(2), Some(&3));
 /// ```
 pub fn lookahead<I>(iterable: I) -> Lookahead<I::IntoIter>
   where I: IntoIterator
 {
   Lookahead::new(iterable.into_iter())
 }
--- a/src/lexer.rs
+++ b/src/lexer.rs
@ -7,24 +7,25 @@ use super::token::{TokenStream, TokenType};
-/// The size of data chunks to read from a file. This was arbitrarily chosen to
+/// The size of data chunks to read from a file. This is an arbitrary choice,
-/// be 1mb.
+/// set to 1MB.
 const BUFFER_SIZE: usize = 1024 * 1024;
-/// The `Lexer` is the core component responsible for performing
+/// The `Lexer` struct is responsible for performing lexical analysis
-/// lexicographical analysis on a text file.
+/// (tokenization) on text.
 ///
-/// It reads input from a file character-by-character, generating a stream
+/// It processes input from a file or string character-by-character and
-/// of base tokens such as text, numbers, whitespace, symbols, and newlines.
+/// generates a stream of tokens, such as text, numbers, whitespace, symbols,
-/// These tokens are accumulated into a `TokenStream`, which is a flat,
+/// and newlines. These tokens are accumulated into a `TokenStream`, which is a
-/// cache-friendly data structure.
+/// flat, cache-friendly data structure designed for efficient iteration.
 ///
-/// After tokenization, the lexer applies a user-provided `transform` function
+/// After the base tokens are generated, the `Lexer` allows for transformation
-/// to each token in the stream, allowing consumers of the library to convert
+/// of these tokens into richer, domain-specific types via a user-provided
-/// base tokens into richer, domain-specific token types (e.g. Markdown
+/// `transform` function. This transformation can be used to convert base tokens
-/// elements, syntax trees, or custom DSL tokens).
+/// into specific elements of a Markdown syntax tree, custom DSL tokens, or any
 /// other custom format you need.
 ///
 /// # Example
 ///
@ -38,32 +39,51 @@ const BUFFER_SIZE: usize = 1024 * 1024;
 ///
 /// let tokens = Lexer::scan_text("Runes += 42", transform).unwrap();
 ///
 /// // The tuple here is from the transform functions return type.
 /// for token in &tokens
 /// {
 ///    println!("{}", token);
 /// }
 /// ```
 ///
-/// # Design Notes
+/// # Design Considerations
-///
+/// - Utilizes a flat `TokenStream` to improve performance and reduce heap
 /// - Uses a flat `TokenStream` to improve iteration performance and reduce heap
 ///   overhead.
-/// - Consolidates contiguous characters into compound tokens (e.g. multi-digit
+/// - Consolidates contiguous characters into compound tokens (e.g., multi-digit
 ///   numbers).
-/// - Easily extensible via the `transform` function to support higher-level
+/// - Extensible via the `transform` function, enabling the creation of
-///   parsing tasks.
+///   higher-level constructs, like Markdown elements or syntax trees for a
 ///   custom DSL.
 ///
-/// # Errors
+/// # Error Handling
-///
+/// The lexer will return a `LexerError` if the input file cannot be opened or
-/// Returns a `LexerError` if the file cannot be opened or read.
+/// read. Errors include issues such as missing files, read failures, or invalid
 /// input formats.
 pub enum Lexer {}
 impl Lexer
 {
-   /// Scans a file and produces a vector of transformed tokens.
+   /// Scans a file and generates a vector of transformed tokens based on the
   /// provided `transform` function.
   ///
   /// This method opens a file from the given `path`, reads the file line by
   /// line, and converts the input into a stream of tokens. The tokens are
   /// then passed to the `transform` function, which allows users to map
   /// base tokens into domain-specific types.
   ///
   /// # Parameters
   /// - `path`: A path to the file to be lexically analyzed.
   /// - `transform`: A function that takes a `TokenStream<TokenType>` and
   ///   transforms it into a `TokenStream<T>` where `T` is a domain-specific
   ///   type.
   ///
   /// # Returns
   /// A `Result<TokenStream<T>, LexerError>` where `T` is the transformed token
   /// type, or an error.
   ///
   /// # Errors
   /// Returns a `LexerError` if the file cannot be opened or read.
   pub fn scan_file<P, F, T>(path: P, transform: F)
                             -> Result<TokenStream<T>, LexerError>
      where P: AsRef<std::path::Path>,
@ -82,6 +102,7 @@ impl Lexer
      let reader = BufReader::with_capacity(BUFFER_SIZE, input_file);
      // Read the file line by line.
      for line in reader.lines()
      {
         match line
@ -93,7 +114,7 @@ impl Lexer
            Err(_) =>
            {
               return Err(LexerError::new("Unable to read line during \
-                                           Lexigraphical Analysis.",
+                                           Lexical Analysis.",
                                          Span::default(),
                                          Some(path.as_ref()
                                                   .to_string_lossy()
@ -102,10 +123,10 @@ impl Lexer
            }
         }
         // Add the newline token after each line.
         stream.push("\n".to_string(),
                     TokenType::Newline,
                     Span::with_single(cursor));
         cursor.line += 1;
         cursor.column = 0;
      }
@ -113,7 +134,22 @@ impl Lexer
      Ok(transform(&stream))
   }
-   /// Scans a full in-memory string and returns transformed tokens.
+   /// Scans a full in-memory string and produces transformed tokens.
   ///
   /// This method tokenizes the input string `text` and returns the transformed
   /// tokens using the provided `transform` function. It's a convenient way
   /// to perform lexical analysis on in-memory strings without needing to
   /// read from a file.
   ///
   /// # Parameters
   /// - `text`: A string slice representing the in-memory input text to
   ///   analyze.
   /// - `transform`: A function that transforms the base tokens into
   ///   domain-specific types.
   ///
   /// # Returns
   /// A `Result<TokenStream<T>, LexerError>` where `T` is the transformed token
   /// type, or an error.
   pub fn scan_text<F, T>(text: &str, transform: F)
                          -> Result<TokenStream<T>, LexerError>
      where F: FnOnce(&TokenStream<TokenType>) -> TokenStream<T>
@ -121,30 +157,51 @@ impl Lexer
      let mut cursor = Position::default();
      let mut stream = TokenStream::new();
      // Process each line in the input string.
      for line in text.lines()
      {
         Self::scan(line, &mut stream, &mut cursor);
         // Add the newline token after each line.
         stream.push("\n".to_string(),
                     TokenType::Newline,
                     Span::with_single(cursor));
         cursor.line += 1;
         cursor.column = 0;
      }
      // Remove the last newline character if the text did not end with a
      // newline.
      if !text.ends_with('\n')
      {
         stream.pop();
      }
      Ok(transform(&stream))
   }
-   /// Internal: scans a single line of text into tokens.
+   /// Internal method that scans a single line of text into tokens.
   ///
   /// This method processes each character of a line and generates the
   /// corresponding token. It handles cases like numeric tokens, text
   /// tokens, symbols, and whitespace.
   ///
   /// # Parameters
   /// - `line`: A line of text to be lexically analyzed.
   /// - `stream`: A mutable reference to the token stream where the generated
   ///   tokens will be pushed.
   /// - `cursor`: A mutable reference to the cursor position, which tracks the
   ///   current position in the input.
   fn scan(line: &str, stream: &mut TokenStream<TokenType>,
           cursor: &mut Position)
   {
      for c in line.chars()
      {
         // Get the token type based on the character.
         let variant = get_token_type(c);
         let last = stream.len().saturating_sub(1);
         // Handle token merging for contiguous tokens like numbers or text.
         if !stream.is_empty() &&
            variant == stream.variants[last] &&
            (variant == TokenType::Numeric || variant == TokenType::Text)
@ -154,6 +211,7 @@ impl Lexer
         }
         else
         {
            // Add a new token to the stream.
            stream.push(c.to_string(), variant, Span::with_single(*cursor));
         }
@ -164,6 +222,18 @@ impl Lexer
 /// Determines the type of a token based on the current character.
 ///
 /// This helper function is responsible for identifying whether the current
 /// character is part of a known token type such as numeric, text, whitespace,
 /// or symbol.
 ///
 /// # Parameters
 /// - `curr_char`: The current character to analyze.
 ///
 /// # Returns
 /// A `TokenType` corresponding to the character type (e.g., `Numeric`, `Text`,
 /// `Whitespace`, etc.).
 fn get_token_type(curr_char: char) -> TokenType
 {
   match curr_char
--- a/src/lib.rs
+++ b/src/lib.rs
@ -7,14 +7,18 @@
 pub mod library;
 mod ast;
 mod error;
 mod iter;
 mod lexer;
 mod position;
 mod token;
 pub use crate::ast::*;
 pub use crate::error::*;
 pub use crate::iter::*;
 pub use crate::lexer::*;
 pub use crate::position::*;
 pub use crate::token::*;
--- a/tests/iter.rs
+++ b/tests/iter.rs
@ -0,0 +1,43 @@
 use rune::*;
 #[test]
 fn peek_works()
 {
   let mut it = vec![1, 2, 3].into_iter().lookahead();
   assert_eq!(it.peek(0), Some(&1));
   assert_eq!(it.peek(1), Some(&2));
   assert_eq!(it.peek(2), Some(&3));
   assert_eq!(it.peek(3), None);
 }
 #[test]
 fn peek_mut_modifies_item()
 {
   let mut it = vec![10, 20, 30].into_iter().lookahead();
   if let Some(x) = it.peek_mut(1)
   {
      *x += 100;
   }
   assert_eq!(it.next(), Some(10));
   assert_eq!(it.next(), Some(120));
 }
 #[test]
 fn iterates_correctly()
 {
   let mut it = vec![1, 2, 3].into_iter().lookahead();
   assert_eq!(it.next(), Some(1));
   assert_eq!(it.next(), Some(2));
   assert_eq!(it.next(), Some(3));
   assert_eq!(it.next(), None);
 }
 #[test]
 fn size_hint_accounts_for_buffer()
 {
   let mut it = vec![1, 2, 3, 4].into_iter().lookahead();
   it.peek(2);
   let (low, high) = it.size_hint();
   assert!(low >= 4);
   assert_eq!(high, Some(4));
 }
--- a/tests/lexer_tests.rs
+++ b/tests/lexer_tests.rs
@ -53,45 +53,47 @@ fn cleanup_temp_file(path: &PathBuf)
 #[test]
-fn test_basic_lexing()
+fn basic_lexing()
 {
   let tokens =
      Lexer::scan_text("magic runes", dummy_transform).expect("Lexer should \
                                                               succeed");
-   let tokens = tokens.into_iter().map(|t| { (*t.variant, String::from(t.lexeme))}).collect::<Vec<_>>();
+   let tokens = tokens.into_iter()
                      .map(|t| (*t.variant, String::from(t.lexeme)))
                      .collect::<Vec<_>>();
   let expected = vec![(TokenType::Text, "magic".to_string()),
                       (TokenType::Whitespace, " ".to_string()),
-                       (TokenType::Text, "runes".to_string()),
+                       (TokenType::Text, "runes".to_string()),];
                       (TokenType::Newline, "\n".to_string()),];
   assert_eq!(tokens, expected);
 }
 #[test]
-fn test_symbols_and_numbers()
+fn symbols_and_numbers()
 {
   let tokens =
      Lexer::scan_text("13 + 37", dummy_transform).expect("Lexer should \
                                                           succeed");
-   let tokens = tokens.into_iter().map(|t| { (*t.variant, String::from(t.lexeme))}).collect::<Vec<_>>();
+   let tokens = tokens.into_iter()
                      .map(|t| (*t.variant, String::from(t.lexeme)))
                      .collect::<Vec<_>>();
   let expected = vec![(TokenType::Numeric, "13".into()),
                       (TokenType::Whitespace, " ".into()),
                       (TokenType::Symbol, "+".into()),
                       (TokenType::Whitespace, " ".into()),
-                       (TokenType::Numeric, "37".into()),
+                       (TokenType::Numeric, "37".into()),];
                       (TokenType::Newline, "\n".into()),];
   assert_eq!(tokens, expected);
 }
 #[test]
-fn test_lexer_with_cases()
+fn lexer_with_cases()
 {
   let cases = vec![TestCase { name: "simple_words",
                               input: "magic rune",
@ -129,16 +131,16 @@ fn test_lexer_with_cases()
                                                              on case '{}'",
                                                             case.name));
-      let result = result.into_iter().map(|t| { (*t.variant, String::from(t.lexeme))}).collect::<Vec<_>>();
+      let result = result.into_iter()
                         .map(|t| (*t.variant, String::from(t.lexeme)))
                         .collect::<Vec<_>>();
      let expected = case.expected
                         .iter()
                         .map(|(ty, s)| (*ty, s.to_string()))
                         .collect::<Vec<_>>();
-      assert_eq!(result, expected,
+      assert_eq!(result, expected, "Mismatch in test case '{}'", case.name);
                 "Mismatch in test case '{}'",
                 case.name);
      cleanup_temp_file(&path);
   }
Author	SHA1	Message	Date
Myrddin Dundragon	693ff20224	Fixing how the Lexer handles text. Scanning a file has all the lines terminating with a '\n' newline character, but when giving the text directly to the lexer via the scan_text function it should not append a newline at the end if there was no newline in the original input.	2025-05-05 18:17:32 -04:00
Myrddin Dundragon	34a579332d	Added a Lookahead iterator. This adds a Lookahead iterator so that while parsing it is easier to peek ahead however much the parser needs. Basic parsers may not need any, but a lot of parsers have two token lookahead. I've even seen some with three.	2025-05-05 18:17:32 -04:00
Myrddin Dundragon	cb882ceb84	Changing test names.	2025-05-05 18:17:32 -04:00
Myrddin Dundragon	e604bf172b	[#4 ] The initial AST. This is the initial design of the AST. It is built in a data oriented style. It also needs iterators over the AST and the optimized AST as well as some more transformation functions.	2025-05-05 18:17:32 -04:00
Myrddin Dundragon	f5780f50c2	Just some basic updating and cleaning up. - Added comments. - Ran cargo fmt. - Updated the versioning.	2025-05-05 18:17:32 -04:00