Compare commits
5 Commits
cd50b53be5
...
693ff20224
Author | SHA1 | Date | |
---|---|---|---|
693ff20224 | |||
34a579332d | |||
cb882ceb84 | |||
e604bf172b | |||
f5780f50c2 |
2
Cargo.lock
generated
2
Cargo.lock
generated
@ -4,4 +4,4 @@ version = 4
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "rune"
|
name = "rune"
|
||||||
version = "0.2.0"
|
version = "0.3.0"
|
||||||
|
@ -1,6 +1,6 @@
|
|||||||
[package]
|
[package]
|
||||||
name = "rune"
|
name = "rune"
|
||||||
version = "0.2.0"
|
version = "0.3.0"
|
||||||
edition = "2021"
|
edition = "2021"
|
||||||
description = "A lexical analysis library."
|
description = "A lexical analysis library."
|
||||||
repository = "/myrddin/rune"
|
repository = "/myrddin/rune"
|
||||||
|
@ -31,5 +31,5 @@ Then add this to your Cargo.toml file.
|
|||||||
|
|
||||||
```toml
|
```toml
|
||||||
[dependencies]
|
[dependencies]
|
||||||
rune = { version = "0.2.0", registry = "cybermages" }
|
rune = { version = "0.3.0", registry = "cybermages" }
|
||||||
```
|
```
|
||||||
|
@ -5,7 +5,8 @@ use rune::{Lexer, Span, TokenStream, TokenType};
|
|||||||
|
|
||||||
|
|
||||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||||
pub enum MarkdownTokenType {
|
pub enum MarkdownTokenType
|
||||||
|
{
|
||||||
Heading(u8),
|
Heading(u8),
|
||||||
EmphasisStart,
|
EmphasisStart,
|
||||||
EmphasisEnd,
|
EmphasisEnd,
|
||||||
@ -16,13 +17,16 @@ pub enum MarkdownTokenType {
|
|||||||
Symbol,
|
Symbol,
|
||||||
Whitespace,
|
Whitespace,
|
||||||
Newline,
|
Newline,
|
||||||
Unknown,
|
Unknown
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
impl std::fmt::Display for MarkdownTokenType {
|
impl std::fmt::Display for MarkdownTokenType
|
||||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
{
|
||||||
match self {
|
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result
|
||||||
|
{
|
||||||
|
match self
|
||||||
|
{
|
||||||
MarkdownTokenType::Heading(level) => write!(f, "Heading({})", level),
|
MarkdownTokenType::Heading(level) => write!(f, "Heading({})", level),
|
||||||
MarkdownTokenType::EmphasisStart => write!(f, "EmphasisStart"),
|
MarkdownTokenType::EmphasisStart => write!(f, "EmphasisStart"),
|
||||||
MarkdownTokenType::EmphasisEnd => write!(f, "EmphasisEnd"),
|
MarkdownTokenType::EmphasisEnd => write!(f, "EmphasisEnd"),
|
||||||
@ -33,7 +37,7 @@ impl std::fmt::Display for MarkdownTokenType {
|
|||||||
MarkdownTokenType::Symbol => write!(f, "Symbol"),
|
MarkdownTokenType::Symbol => write!(f, "Symbol"),
|
||||||
MarkdownTokenType::Whitespace => write!(f, "Whitespace"),
|
MarkdownTokenType::Whitespace => write!(f, "Whitespace"),
|
||||||
MarkdownTokenType::Newline => write!(f, "Newline"),
|
MarkdownTokenType::Newline => write!(f, "Newline"),
|
||||||
MarkdownTokenType::Unknown => write!(f, "Unknown"),
|
MarkdownTokenType::Unknown => write!(f, "Unknown")
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -41,26 +45,35 @@ impl std::fmt::Display for MarkdownTokenType {
|
|||||||
|
|
||||||
|
|
||||||
// Define how you want to interpret base tokens
|
// Define how you want to interpret base tokens
|
||||||
pub fn transform(input: &TokenStream<TokenType>) -> TokenStream<MarkdownTokenType> {
|
pub fn transform(input: &TokenStream<TokenType>)
|
||||||
|
-> TokenStream<MarkdownTokenType>
|
||||||
|
{
|
||||||
let mut output = TokenStream::new();
|
let mut output = TokenStream::new();
|
||||||
|
|
||||||
let mut i = 0;
|
let mut i = 0;
|
||||||
while i < input.len() {
|
while i < input.len()
|
||||||
|
{
|
||||||
let token = input.get(i).unwrap(); // safe due to bounds check above
|
let token = input.get(i).unwrap(); // safe due to bounds check above
|
||||||
|
|
||||||
match token.variant {
|
match token.variant
|
||||||
TokenType::Symbol if token.lexeme == "#" => {
|
{
|
||||||
|
TokenType::Symbol if token.lexeme == "#" =>
|
||||||
|
{
|
||||||
// Count consecutive #s for heading level
|
// Count consecutive #s for heading level
|
||||||
let mut level = 1;
|
let mut level = 1;
|
||||||
let mut span = token.span.clone();
|
let mut span = token.span.clone();
|
||||||
|
|
||||||
while i + 1 < input.len() {
|
while i + 1 < input.len()
|
||||||
|
{
|
||||||
let next = input.get(i + 1).unwrap();
|
let next = input.get(i + 1).unwrap();
|
||||||
if *next.variant == TokenType::Symbol && next.lexeme == "#" {
|
if *next.variant == TokenType::Symbol && next.lexeme == "#"
|
||||||
|
{
|
||||||
level += 1;
|
level += 1;
|
||||||
span.end = next.span.end;
|
span.end = next.span.end;
|
||||||
i += 1;
|
i += 1;
|
||||||
} else {
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -70,58 +83,71 @@ pub fn transform(input: &TokenStream<TokenType>) -> TokenStream<MarkdownTokenTyp
|
|||||||
span);
|
span);
|
||||||
}
|
}
|
||||||
|
|
||||||
TokenType::Symbol if token.lexeme == "*" => {
|
TokenType::Symbol if token.lexeme == "*" =>
|
||||||
|
{
|
||||||
// Look ahead to see if it's strong (**) or emphasis (*)
|
// Look ahead to see if it's strong (**) or emphasis (*)
|
||||||
if i + 1 < input.len() {
|
if i + 1 < input.len()
|
||||||
|
{
|
||||||
let next = input.get(i + 1).unwrap();
|
let next = input.get(i + 1).unwrap();
|
||||||
if *next.variant == TokenType::Symbol && next.lexeme == "*" {
|
if *next.variant == TokenType::Symbol && next.lexeme == "*"
|
||||||
|
{
|
||||||
output.push("**".to_string(),
|
output.push("**".to_string(),
|
||||||
MarkdownTokenType::StrongStart,
|
MarkdownTokenType::StrongStart,
|
||||||
Span::merge(*token.span, *next.span));
|
Span::merge(*token.span, *next.span));
|
||||||
i += 1; // skip the second '*'
|
i += 1; // skip the second '*'
|
||||||
} else {
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
output.push("*".to_string(),
|
output.push("*".to_string(),
|
||||||
MarkdownTokenType::EmphasisStart,
|
MarkdownTokenType::EmphasisStart,
|
||||||
token.span.clone());
|
token.span.clone());
|
||||||
}
|
}
|
||||||
} else {
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
output.push("*".to_string(),
|
output.push("*".to_string(),
|
||||||
MarkdownTokenType::EmphasisStart,
|
MarkdownTokenType::EmphasisStart,
|
||||||
token.span.clone());
|
token.span.clone());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
TokenType::Symbol if token.lexeme == "`" => {
|
TokenType::Symbol if token.lexeme == "`" =>
|
||||||
|
{
|
||||||
output.push(token.lexeme.to_string(),
|
output.push(token.lexeme.to_string(),
|
||||||
MarkdownTokenType::CodeSpan,
|
MarkdownTokenType::CodeSpan,
|
||||||
token.span.clone());
|
token.span.clone());
|
||||||
}
|
}
|
||||||
|
|
||||||
TokenType::Text => {
|
TokenType::Text =>
|
||||||
|
{
|
||||||
output.push(token.lexeme.to_string(),
|
output.push(token.lexeme.to_string(),
|
||||||
MarkdownTokenType::Text,
|
MarkdownTokenType::Text,
|
||||||
token.span.clone());
|
token.span.clone());
|
||||||
}
|
}
|
||||||
|
|
||||||
TokenType::Symbol => {
|
TokenType::Symbol =>
|
||||||
|
{
|
||||||
output.push(token.lexeme.to_string(),
|
output.push(token.lexeme.to_string(),
|
||||||
MarkdownTokenType::Symbol,
|
MarkdownTokenType::Symbol,
|
||||||
token.span.clone());
|
token.span.clone());
|
||||||
}
|
}
|
||||||
|
|
||||||
TokenType::Whitespace => {
|
TokenType::Whitespace =>
|
||||||
|
{
|
||||||
output.push(token.lexeme.to_string(),
|
output.push(token.lexeme.to_string(),
|
||||||
MarkdownTokenType::Whitespace,
|
MarkdownTokenType::Whitespace,
|
||||||
token.span.clone());
|
token.span.clone());
|
||||||
}
|
}
|
||||||
|
|
||||||
TokenType::Newline => {
|
TokenType::Newline =>
|
||||||
|
{
|
||||||
output.push(token.lexeme.to_string(),
|
output.push(token.lexeme.to_string(),
|
||||||
MarkdownTokenType::Newline,
|
MarkdownTokenType::Newline,
|
||||||
token.span.clone());
|
token.span.clone());
|
||||||
}
|
}
|
||||||
|
|
||||||
_ => {
|
_ =>
|
||||||
|
{
|
||||||
output.push(token.lexeme.to_string(),
|
output.push(token.lexeme.to_string(),
|
||||||
MarkdownTokenType::Unknown,
|
MarkdownTokenType::Unknown,
|
||||||
token.span.clone());
|
token.span.clone());
|
||||||
|
327
src/ast.rs
Normal file
327
src/ast.rs
Normal file
@ -0,0 +1,327 @@
|
|||||||
|
use std::collections::VecDeque;
|
||||||
|
|
||||||
|
use super::position::Span;
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
/// A unique identifier for a node in the AST. Internally, this is just an index
|
||||||
|
/// into the node arrays.
|
||||||
|
pub type NodeId = usize;
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
/// The possible orders in which an AST may be stored for traversal.
|
||||||
|
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
||||||
|
pub enum TraversalOrder
|
||||||
|
{
|
||||||
|
/// Depth-first search (pre-order) layout.
|
||||||
|
DFS,
|
||||||
|
/// Breadth-first search layout.
|
||||||
|
BFS
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
/// The data associated with a single node in the AST.
|
||||||
|
#[derive(Debug, Clone)]
|
||||||
|
pub struct NodeData<T>
|
||||||
|
{
|
||||||
|
pub span: Span,
|
||||||
|
pub data: T
|
||||||
|
}
|
||||||
|
|
||||||
|
/// The mutable AST structure used during parsing. Nodes are created
|
||||||
|
/// incrementally and linked via parent relationships. Traversal order is not
|
||||||
|
/// guaranteed until `optimize()` is called.
|
||||||
|
pub struct Ast<T>
|
||||||
|
{
|
||||||
|
nodes: Vec<NodeData<T>>,
|
||||||
|
parents: Vec<Option<NodeId>>
|
||||||
|
}
|
||||||
|
|
||||||
|
/// An optimized, immutable AST layout produced from `Ast<T>::optimize`.
|
||||||
|
/// This structure is ideal for traversal, analysis, and code generation.
|
||||||
|
pub struct OptimizedAst<T>
|
||||||
|
{
|
||||||
|
/// Node data in a linear layout (DFS or BFS order).
|
||||||
|
pub nodes: Vec<NodeData<T>>,
|
||||||
|
/// Each node’s parent, if any.
|
||||||
|
pub parents: Vec<Option<NodeId>>,
|
||||||
|
/// The traversal order the nodes are stored in.
|
||||||
|
pub order: TraversalOrder
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
impl<T> Ast<T>
|
||||||
|
{
|
||||||
|
/// Creates a new, empty AST.
|
||||||
|
pub fn new() -> Self
|
||||||
|
{
|
||||||
|
Ast { nodes: Vec::new(),
|
||||||
|
parents: Vec::new() }
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Returns the parent of a node, if any.
|
||||||
|
pub fn get_parent(&self, id: NodeId) -> Option<NodeId>
|
||||||
|
{
|
||||||
|
self.parents.get(id).copied().flatten()
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Returns a reference to the node data at the given ID, if it exists.
|
||||||
|
pub fn get(&self, id: NodeId) -> Option<&NodeData<T>>
|
||||||
|
{
|
||||||
|
self.nodes.get(id)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Returns a mutable reference to the node data at the given ID, if it
|
||||||
|
/// exists.
|
||||||
|
pub fn get_mut(&mut self, id: NodeId) -> Option<&mut NodeData<T>>
|
||||||
|
{
|
||||||
|
self.nodes.get_mut(id)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Adds a new node to the AST.
|
||||||
|
///
|
||||||
|
/// - `data`: The custom payload of the node (usually an enum or struct).
|
||||||
|
/// - `span`: The source span the node represents.
|
||||||
|
/// - `parent`: Optional parent NodeId to attach this node to.
|
||||||
|
///
|
||||||
|
/// Returns the NodeId of the newly added node.
|
||||||
|
pub fn add_node(&mut self, data: T, span: Span, parent: Option<NodeId>)
|
||||||
|
-> NodeId
|
||||||
|
{
|
||||||
|
let id = self.nodes.len();
|
||||||
|
self.nodes.push(NodeData { data, span });
|
||||||
|
self.parents.push(parent);
|
||||||
|
id
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Joins another AST into this one, returning a mapping from old node IDs
|
||||||
|
/// in `other` to new node IDs in `self`.
|
||||||
|
///
|
||||||
|
/// Optionally attaches all root nodes of the other AST to a parent node
|
||||||
|
/// in the current AST.
|
||||||
|
pub fn join(&mut self, other: Ast<T>, attach_to: Option<NodeId>)
|
||||||
|
-> Vec<NodeId>
|
||||||
|
{
|
||||||
|
let base_id = self.nodes.len();
|
||||||
|
let mut id_map = Vec::with_capacity(other.nodes.len());
|
||||||
|
|
||||||
|
for (i, node) in other.nodes.into_iter().enumerate()
|
||||||
|
{
|
||||||
|
self.nodes.push(node);
|
||||||
|
let new_parent = match other.parents[i]
|
||||||
|
{
|
||||||
|
Some(pid) => Some(base_id + pid),
|
||||||
|
None => attach_to // attach root nodes to given parent if provided
|
||||||
|
};
|
||||||
|
self.parents.push(new_parent);
|
||||||
|
id_map.push(base_id + i);
|
||||||
|
}
|
||||||
|
|
||||||
|
id_map
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Prunes the subtree rooted at `root`, compacting the AST in place.
|
||||||
|
/// Node IDs will change after this operation.
|
||||||
|
pub fn prune(&mut self, root: NodeId)
|
||||||
|
{
|
||||||
|
let mut to_remove = Vec::new();
|
||||||
|
collect_descendants(root, &self.parents, &mut to_remove);
|
||||||
|
to_remove.push(root);
|
||||||
|
|
||||||
|
let mut is_removed = vec![false; self.nodes.len()];
|
||||||
|
for &id in &to_remove
|
||||||
|
{
|
||||||
|
is_removed[id] = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
let mut remap = vec![None; self.nodes.len()];
|
||||||
|
let mut next_insert = 0;
|
||||||
|
|
||||||
|
for i in 0..self.nodes.len()
|
||||||
|
{
|
||||||
|
if !is_removed[i]
|
||||||
|
{
|
||||||
|
if i != next_insert
|
||||||
|
{
|
||||||
|
self.nodes.swap(i, next_insert);
|
||||||
|
self.parents.swap(i, next_insert);
|
||||||
|
}
|
||||||
|
remap[i] = Some(next_insert);
|
||||||
|
next_insert += 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
self.nodes.truncate(next_insert);
|
||||||
|
self.parents.truncate(next_insert);
|
||||||
|
|
||||||
|
for parent in self.parents.iter_mut()
|
||||||
|
{
|
||||||
|
if let Some(pid) = *parent
|
||||||
|
{
|
||||||
|
*parent = remap[pid];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Optimizes the AST layout for a specific traversal order (DFS or BFS).
|
||||||
|
///
|
||||||
|
/// This consumes the `Ast`, rearranges the internal storage so that
|
||||||
|
/// iterating over the nodes reflects the chosen traversal order, and
|
||||||
|
/// returns a new, immutable `OptimizedAst<T>`.
|
||||||
|
///
|
||||||
|
/// No need for `T: Clone` anymore, since we will move data instead of
|
||||||
|
/// cloning it.
|
||||||
|
pub fn optimize(self, order: TraversalOrder) -> OptimizedAst<T>
|
||||||
|
{
|
||||||
|
let ordering = match order
|
||||||
|
{
|
||||||
|
TraversalOrder::DFS => dfs_order(&self.parents),
|
||||||
|
TraversalOrder::BFS => bfs_order(&self.parents)
|
||||||
|
};
|
||||||
|
|
||||||
|
let mut remap = vec![0; self.nodes.len()];
|
||||||
|
for (new_id, &old_id) in ordering.iter().enumerate()
|
||||||
|
{
|
||||||
|
remap[old_id] = new_id;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Wrap nodes in Option to allow taking them by value without cloning
|
||||||
|
let mut nodes_opt: Vec<Option<NodeData<T>>> =
|
||||||
|
self.nodes.into_iter().map(Some).collect();
|
||||||
|
|
||||||
|
let mut new_nodes = Vec::with_capacity(nodes_opt.len());
|
||||||
|
let mut new_parents = vec![None; self.parents.len()];
|
||||||
|
|
||||||
|
for &old_id in &ordering
|
||||||
|
{
|
||||||
|
let new_id = remap[old_id];
|
||||||
|
let node = nodes_opt[old_id].take()
|
||||||
|
.expect("Node was already moved out");
|
||||||
|
|
||||||
|
let parent = self.parents[old_id].map(|pid| remap[pid]);
|
||||||
|
|
||||||
|
new_nodes.push(node);
|
||||||
|
new_parents[new_id] = parent;
|
||||||
|
}
|
||||||
|
|
||||||
|
OptimizedAst { nodes: new_nodes,
|
||||||
|
parents: new_parents,
|
||||||
|
order }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
/// Helper to recursively collect all descendants of a node.
|
||||||
|
fn collect_descendants(root: NodeId, parents: &[Option<NodeId>],
|
||||||
|
acc: &mut Vec<NodeId>)
|
||||||
|
{
|
||||||
|
for (i, &parent) in parents.iter().enumerate()
|
||||||
|
{
|
||||||
|
if parent == Some(root)
|
||||||
|
{
|
||||||
|
collect_descendants(i, parents, acc);
|
||||||
|
acc.push(i);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Recursively visits nodes in a depth-first (pre-order) manner starting from
|
||||||
|
/// `current`, building up the DFS traversal order.
|
||||||
|
///
|
||||||
|
/// - `current`: The current node ID being visited.
|
||||||
|
/// - `parents`: A slice representing the parent relationship for each node
|
||||||
|
/// (index = child, value = optional parent).
|
||||||
|
/// - `order`: A mutable vector that will accumulate the DFS traversal order.
|
||||||
|
/// - `visited`: A mutable slice used to track which nodes have already been
|
||||||
|
/// visited.
|
||||||
|
fn visit(current: NodeId, parents: &[Option<NodeId>], order: &mut Vec<NodeId>,
|
||||||
|
visited: &mut [bool])
|
||||||
|
{
|
||||||
|
// Skip this node if it's already been visited
|
||||||
|
if visited[current]
|
||||||
|
{
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Mark the node as visited to avoid reprocessing it
|
||||||
|
visited[current] = true;
|
||||||
|
|
||||||
|
// Add this node to the traversal order (pre-order)
|
||||||
|
order.push(current);
|
||||||
|
|
||||||
|
// Recursively visit all child nodes (i.e., nodes that list `current` as
|
||||||
|
// their parent)
|
||||||
|
for (i, &parent) in parents.iter().enumerate()
|
||||||
|
{
|
||||||
|
if parent == Some(current)
|
||||||
|
{
|
||||||
|
visit(i, parents, order, visited);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Computes the DFS (depth-first, pre-order) ordering of nodes based on the
|
||||||
|
/// parent table.
|
||||||
|
///
|
||||||
|
/// Returns a Vec<NodeId> containing the node IDs in DFS order.
|
||||||
|
fn dfs_order(parents: &[Option<NodeId>]) -> Vec<NodeId>
|
||||||
|
{
|
||||||
|
let mut order = Vec::new();
|
||||||
|
let mut visited = vec![false; parents.len()];
|
||||||
|
|
||||||
|
// Start DFS from all root nodes (nodes with no parent).
|
||||||
|
for (i, &parent) in parents.iter().enumerate()
|
||||||
|
{
|
||||||
|
if parent.is_none()
|
||||||
|
{
|
||||||
|
visit(i, parents, &mut order, &mut visited);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
order
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Computes the BFS (breadth-first) ordering of nodes based on the parent
|
||||||
|
/// table.
|
||||||
|
///
|
||||||
|
/// Returns a Vec<NodeId> containing the node IDs in BFS order.
|
||||||
|
fn bfs_order(parents: &[Option<NodeId>]) -> Vec<NodeId>
|
||||||
|
{
|
||||||
|
let mut order = Vec::new();
|
||||||
|
let mut visited = vec![false; parents.len()];
|
||||||
|
let mut queue = VecDeque::new();
|
||||||
|
|
||||||
|
// Start BFS from all root nodes.
|
||||||
|
for (i, &parent) in parents.iter().enumerate()
|
||||||
|
{
|
||||||
|
if parent.is_none()
|
||||||
|
{
|
||||||
|
queue.push_back(i);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
while let Some(current) = queue.pop_front()
|
||||||
|
{
|
||||||
|
if visited[current]
|
||||||
|
{
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
visited[current] = true;
|
||||||
|
order.push(current);
|
||||||
|
|
||||||
|
for (i, &parent) in parents.iter().enumerate()
|
||||||
|
{
|
||||||
|
if parent == Some(current)
|
||||||
|
{
|
||||||
|
queue.push_back(i);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
order
|
||||||
|
}
|
62
src/error.rs
62
src/error.rs
@ -1,4 +1,6 @@
|
|||||||
use std::{error::Error, path::PathBuf};
|
use std::error::Error;
|
||||||
|
use std::path::PathBuf;
|
||||||
|
|
||||||
use super::position::Span;
|
use super::position::Span;
|
||||||
|
|
||||||
|
|
||||||
@ -12,7 +14,8 @@ use super::position::Span;
|
|||||||
/// It is designed to provide detailed diagnostics for file-based or
|
/// It is designed to provide detailed diagnostics for file-based or
|
||||||
/// in-memory parsing and is compatible with error reporting ecosystems.
|
/// in-memory parsing and is compatible with error reporting ecosystems.
|
||||||
#[derive(Debug)]
|
#[derive(Debug)]
|
||||||
pub struct LexerError {
|
pub struct LexerError
|
||||||
|
{
|
||||||
/// A human-readable error message.
|
/// A human-readable error message.
|
||||||
pub message: String,
|
pub message: String,
|
||||||
|
|
||||||
@ -26,10 +29,11 @@ pub struct LexerError {
|
|||||||
pub snippet: Option<String>,
|
pub snippet: Option<String>,
|
||||||
|
|
||||||
/// An optional underlying error that caused this one.
|
/// An optional underlying error that caused this one.
|
||||||
pub source: Option<Box<dyn Error>>,
|
pub source: Option<Box<dyn Error>>
|
||||||
}
|
}
|
||||||
|
|
||||||
impl LexerError {
|
impl LexerError
|
||||||
|
{
|
||||||
/// Creates a new `LexerError` with a message, span, and optional context.
|
/// Creates a new `LexerError` with a message, span, and optional context.
|
||||||
///
|
///
|
||||||
/// # Parameters
|
/// # Parameters
|
||||||
@ -40,31 +44,24 @@ impl LexerError {
|
|||||||
///
|
///
|
||||||
/// # Returns
|
/// # Returns
|
||||||
/// A new instance of `LexerError`.
|
/// A new instance of `LexerError`.
|
||||||
pub fn new<S, T>(
|
pub fn new<S, T>(message: S, span: Span, file: Option<T>,
|
||||||
message: S,
|
snippet: Option<S>)
|
||||||
span: Span,
|
-> Self
|
||||||
file: Option<T>,
|
where S: Into<String>,
|
||||||
snippet: Option<S>,
|
T: Into<PathBuf>
|
||||||
) -> Self
|
|
||||||
where
|
|
||||||
S: Into<String>,
|
|
||||||
T: Into<PathBuf>,
|
|
||||||
{
|
{
|
||||||
LexerError {
|
LexerError { message: message.into(),
|
||||||
message: message.into(),
|
|
||||||
span,
|
span,
|
||||||
file: file.map(Into::into),
|
file: file.map(Into::into),
|
||||||
snippet: snippet.map(Into::into),
|
snippet: snippet.map(Into::into),
|
||||||
source: None,
|
source: None }
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Creates a `LexerError` from only a message and span.
|
/// Creates a `LexerError` from only a message and span.
|
||||||
///
|
///
|
||||||
/// This is useful when file or snippet context is not available.
|
/// This is useful when file or snippet context is not available.
|
||||||
pub fn from_message<S>(message: S, span: Span) -> Self
|
pub fn from_message<S>(message: S, span: Span) -> Self
|
||||||
where
|
where S: Into<String>
|
||||||
S: Into<String>,
|
|
||||||
{
|
{
|
||||||
Self::new(message, span, None::<PathBuf>, None::<S>)
|
Self::new(message, span, None::<PathBuf>, None::<S>)
|
||||||
}
|
}
|
||||||
@ -73,8 +70,7 @@ impl LexerError {
|
|||||||
///
|
///
|
||||||
/// This is helpful for diagnostics and tooling output.
|
/// This is helpful for diagnostics and tooling output.
|
||||||
pub fn with_snippet<S>(mut self, snippet: S) -> Self
|
pub fn with_snippet<S>(mut self, snippet: S) -> Self
|
||||||
where
|
where S: Into<String>
|
||||||
S: Into<String>,
|
|
||||||
{
|
{
|
||||||
self.snippet = Some(snippet.into());
|
self.snippet = Some(snippet.into());
|
||||||
self
|
self
|
||||||
@ -82,8 +78,7 @@ impl LexerError {
|
|||||||
|
|
||||||
/// Attaches the path of the file in which the error occurred.
|
/// Attaches the path of the file in which the error occurred.
|
||||||
pub fn with_file<T>(mut self, file: T) -> Self
|
pub fn with_file<T>(mut self, file: T) -> Self
|
||||||
where
|
where T: Into<PathBuf>
|
||||||
T: Into<PathBuf>,
|
|
||||||
{
|
{
|
||||||
self.file = Some(file.into());
|
self.file = Some(file.into());
|
||||||
self
|
self
|
||||||
@ -93,25 +88,28 @@ impl LexerError {
|
|||||||
///
|
///
|
||||||
/// This allows you to chain errors for more detailed diagnostics.
|
/// This allows you to chain errors for more detailed diagnostics.
|
||||||
pub fn with_source<E>(mut self, err: E) -> Self
|
pub fn with_source<E>(mut self, err: E) -> Self
|
||||||
where
|
where E: Error + 'static
|
||||||
E: Error + 'static,
|
|
||||||
{
|
{
|
||||||
self.source = Some(Box::new(err));
|
self.source = Some(Box::new(err));
|
||||||
self
|
self
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl std::fmt::Display for LexerError {
|
impl std::fmt::Display for LexerError
|
||||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
{
|
||||||
|
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result
|
||||||
|
{
|
||||||
write!(f, "Lexer error at {}", self.span)?;
|
write!(f, "Lexer error at {}", self.span)?;
|
||||||
|
|
||||||
if let Some(file) = &self.file {
|
if let Some(file) = &self.file
|
||||||
|
{
|
||||||
write!(f, " in file `{}`", file.display())?;
|
write!(f, " in file `{}`", file.display())?;
|
||||||
}
|
}
|
||||||
|
|
||||||
write!(f, ": {}", self.message)?;
|
write!(f, ": {}", self.message)?;
|
||||||
|
|
||||||
if let Some(snippet) = &self.snippet {
|
if let Some(snippet) = &self.snippet
|
||||||
|
{
|
||||||
write!(f, "\n --> Snippet: `{}`", snippet)?;
|
write!(f, "\n --> Snippet: `{}`", snippet)?;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -119,9 +117,11 @@ impl std::fmt::Display for LexerError {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Error for LexerError {
|
impl Error for LexerError
|
||||||
|
{
|
||||||
/// Returns the underlying cause of this error, if any.
|
/// Returns the underlying cause of this error, if any.
|
||||||
fn source(&self) -> Option<&(dyn Error + 'static)> {
|
fn source(&self) -> Option<&(dyn Error + 'static)>
|
||||||
|
{
|
||||||
self.source.as_ref().map(|e| e.as_ref())
|
self.source.as_ref().map(|e| e.as_ref())
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
209
src/iter.rs
Normal file
209
src/iter.rs
Normal file
@ -0,0 +1,209 @@
|
|||||||
|
//! An iterator adapter for arbitrary lookahead functionality.
|
||||||
|
//!
|
||||||
|
//! This module provides [`Lookahead`], an adapter for any iterator that allows
|
||||||
|
//! you to peek ahead by any number of elements, without consuming them.
|
||||||
|
//!
|
||||||
|
//! ## Example
|
||||||
|
//! ```
|
||||||
|
//! use rune::LookaheadExt;
|
||||||
|
//!
|
||||||
|
//! let mut it = vec![10, 20, 30].into_iter().lookahead();
|
||||||
|
//!
|
||||||
|
//! assert_eq!(it.peek(0), Some(&10));
|
||||||
|
//! assert_eq!(it.peek(1), Some(&20));
|
||||||
|
//! assert_eq!(it.next(), Some(10));
|
||||||
|
//! assert_eq!(it.peek(0), Some(&20));
|
||||||
|
//! ```
|
||||||
|
|
||||||
|
use std::collections::VecDeque;
|
||||||
|
use std::fmt;
|
||||||
|
use std::iter::{Fuse, FusedIterator};
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
/// An iterator adapter that allows arbitrary lookahead peeking.
|
||||||
|
///
|
||||||
|
/// This struct wraps an iterator and buffers items so that any future
|
||||||
|
/// item can be accessed by index without consuming them. Similar to
|
||||||
|
/// [`std::iter::Peekable`], but supports peeking any number of steps ahead.
|
||||||
|
pub struct Lookahead<I>
|
||||||
|
where I: Iterator
|
||||||
|
{
|
||||||
|
iter: Fuse<I>,
|
||||||
|
buffer: VecDeque<I::Item>
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
impl<I> Lookahead<I> where I: Iterator
|
||||||
|
{
|
||||||
|
/// Creates a new [`Lookahead`] from the given iterator.
|
||||||
|
///
|
||||||
|
/// This constructor is typically used indirectly via the
|
||||||
|
/// [`LookaheadExt::lookahead()`] method or [`lookahead()`] free function.
|
||||||
|
#[must_use]
|
||||||
|
pub fn new(iter: I) -> Self
|
||||||
|
{
|
||||||
|
Lookahead { iter: iter.fuse(),
|
||||||
|
buffer: VecDeque::new() }
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Returns a reference to the `n`th upcoming item, if it exists.
|
||||||
|
///
|
||||||
|
/// `peek(0)` is the same as peeking at the next item.
|
||||||
|
///
|
||||||
|
/// This does **not consume** any items from the iterator.
|
||||||
|
///
|
||||||
|
/// # Examples
|
||||||
|
/// ```
|
||||||
|
/// use rune::LookaheadExt;
|
||||||
|
///
|
||||||
|
/// let mut it = vec![1, 2, 3].into_iter().lookahead();
|
||||||
|
///
|
||||||
|
/// assert_eq!(it.peek(1), Some(&2));
|
||||||
|
/// assert_eq!(it.next(), Some(1));
|
||||||
|
/// ```
|
||||||
|
pub fn peek(&mut self, n: usize) -> Option<&I::Item>
|
||||||
|
{
|
||||||
|
while self.buffer.len() <= n
|
||||||
|
{
|
||||||
|
if let Some(item) = self.iter.next()
|
||||||
|
{
|
||||||
|
self.buffer.push_back(item);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
self.buffer.get(n)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Returns a mutable reference to the `n`th upcoming item, if it exists.
|
||||||
|
///
|
||||||
|
/// This allows in-place modification of peeked items before consumption.
|
||||||
|
///
|
||||||
|
/// # Examples
|
||||||
|
/// ```
|
||||||
|
/// use rune::LookaheadExt;
|
||||||
|
///
|
||||||
|
/// let mut it = vec![1, 2, 3].into_iter().lookahead();
|
||||||
|
/// if let Some(x) = it.peek_mut(1)
|
||||||
|
/// {
|
||||||
|
/// *x *= 10;
|
||||||
|
/// }
|
||||||
|
/// assert_eq!(it.next(), Some(1));
|
||||||
|
/// assert_eq!(it.next(), Some(20));
|
||||||
|
/// ```
|
||||||
|
pub fn peek_mut(&mut self, n: usize) -> Option<&mut I::Item>
|
||||||
|
{
|
||||||
|
while self.buffer.len() <= n
|
||||||
|
{
|
||||||
|
if let Some(item) = self.iter.next()
|
||||||
|
{
|
||||||
|
self.buffer.push_back(item);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
self.buffer.get_mut(n)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<I> Iterator for Lookahead<I> where I: Iterator
|
||||||
|
{
|
||||||
|
type Item = I::Item;
|
||||||
|
|
||||||
|
/// Retrieves the next item, consuming it.
|
||||||
|
///
|
||||||
|
/// If any items were previously peeked and buffered, they are returned
|
||||||
|
/// first before accessing the underlying iterator.
|
||||||
|
fn next(&mut self) -> Option<Self::Item>
|
||||||
|
{
|
||||||
|
if let Some(front) = self.buffer.pop_front()
|
||||||
|
{
|
||||||
|
Some(front)
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
self.iter.next()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Provides a size hint accounting for both buffered and remaining elements.
|
||||||
|
fn size_hint(&self) -> (usize, Option<usize>)
|
||||||
|
{
|
||||||
|
let (low, high) = self.iter.size_hint();
|
||||||
|
let buffered = self.buffer.len();
|
||||||
|
(low.saturating_add(buffered), high.and_then(|h| h.checked_add(buffered)))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<I> Clone for Lookahead<I>
|
||||||
|
where I: Iterator + Clone,
|
||||||
|
I::Item: Clone
|
||||||
|
{
|
||||||
|
fn clone(&self) -> Self
|
||||||
|
{
|
||||||
|
Lookahead { iter: self.iter.clone(),
|
||||||
|
buffer: self.buffer.clone() }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<I> fmt::Debug for Lookahead<I>
|
||||||
|
where I: Iterator + fmt::Debug,
|
||||||
|
I::Item: fmt::Debug
|
||||||
|
{
|
||||||
|
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result
|
||||||
|
{
|
||||||
|
f.debug_struct("Lookahead")
|
||||||
|
.field("iter", &self.iter)
|
||||||
|
.field("buffer", &self.buffer)
|
||||||
|
.finish()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<I> FusedIterator for Lookahead<I> where I: Iterator + FusedIterator {}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
/// Extension trait to provide `.lookahead()` on all iterators.
|
||||||
|
///
|
||||||
|
/// This lets you easily call `.lookahead()` on any iterator to
|
||||||
|
/// create a [`Lookahead`] instance.
|
||||||
|
pub trait LookaheadExt: Iterator + Sized
|
||||||
|
{
|
||||||
|
/// Wraps the iterator in a [`Lookahead`] adapter.
|
||||||
|
fn lookahead(self) -> Lookahead<Self>;
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<I: Iterator> LookaheadExt for I
|
||||||
|
{
|
||||||
|
fn lookahead(self) -> Lookahead<Self>
|
||||||
|
{
|
||||||
|
Lookahead::new(self)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
/// Creates a [`Lookahead`] from any iterable.
|
||||||
|
///
|
||||||
|
/// This is a convenience function for use in functional-style code or
|
||||||
|
/// when not using the extension trait.
|
||||||
|
///
|
||||||
|
/// # Example
|
||||||
|
/// ```
|
||||||
|
/// use rune::lookahead;
|
||||||
|
///
|
||||||
|
/// let mut it = lookahead(vec![1, 2, 3]);
|
||||||
|
///
|
||||||
|
/// assert_eq!(it.peek(2), Some(&3));
|
||||||
|
/// ```
|
||||||
|
pub fn lookahead<I>(iterable: I) -> Lookahead<I::IntoIter>
|
||||||
|
where I: IntoIterator
|
||||||
|
{
|
||||||
|
Lookahead::new(iterable.into_iter())
|
||||||
|
}
|
126
src/lexer.rs
126
src/lexer.rs
@ -7,24 +7,25 @@ use super::token::{TokenStream, TokenType};
|
|||||||
|
|
||||||
|
|
||||||
|
|
||||||
/// The size of data chunks to read from a file. This was arbitrarily chosen to
|
/// The size of data chunks to read from a file. This is an arbitrary choice,
|
||||||
/// be 1mb.
|
/// set to 1MB.
|
||||||
const BUFFER_SIZE: usize = 1024 * 1024;
|
const BUFFER_SIZE: usize = 1024 * 1024;
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/// The `Lexer` is the core component responsible for performing
|
/// The `Lexer` struct is responsible for performing lexical analysis
|
||||||
/// lexicographical analysis on a text file.
|
/// (tokenization) on text.
|
||||||
///
|
///
|
||||||
/// It reads input from a file character-by-character, generating a stream
|
/// It processes input from a file or string character-by-character and
|
||||||
/// of base tokens such as text, numbers, whitespace, symbols, and newlines.
|
/// generates a stream of tokens, such as text, numbers, whitespace, symbols,
|
||||||
/// These tokens are accumulated into a `TokenStream`, which is a flat,
|
/// and newlines. These tokens are accumulated into a `TokenStream`, which is a
|
||||||
/// cache-friendly data structure.
|
/// flat, cache-friendly data structure designed for efficient iteration.
|
||||||
///
|
///
|
||||||
/// After tokenization, the lexer applies a user-provided `transform` function
|
/// After the base tokens are generated, the `Lexer` allows for transformation
|
||||||
/// to each token in the stream, allowing consumers of the library to convert
|
/// of these tokens into richer, domain-specific types via a user-provided
|
||||||
/// base tokens into richer, domain-specific token types (e.g. Markdown
|
/// `transform` function. This transformation can be used to convert base tokens
|
||||||
/// elements, syntax trees, or custom DSL tokens).
|
/// into specific elements of a Markdown syntax tree, custom DSL tokens, or any
|
||||||
|
/// other custom format you need.
|
||||||
///
|
///
|
||||||
/// # Example
|
/// # Example
|
||||||
///
|
///
|
||||||
@ -38,32 +39,51 @@ const BUFFER_SIZE: usize = 1024 * 1024;
|
|||||||
///
|
///
|
||||||
/// let tokens = Lexer::scan_text("Runes += 42", transform).unwrap();
|
/// let tokens = Lexer::scan_text("Runes += 42", transform).unwrap();
|
||||||
///
|
///
|
||||||
/// // The tuple here is from the transform functions return type.
|
|
||||||
/// for token in &tokens
|
/// for token in &tokens
|
||||||
/// {
|
/// {
|
||||||
/// println!("{}", token);
|
/// println!("{}", token);
|
||||||
/// }
|
/// }
|
||||||
/// ```
|
/// ```
|
||||||
///
|
///
|
||||||
/// # Design Notes
|
/// # Design Considerations
|
||||||
///
|
/// - Utilizes a flat `TokenStream` to improve performance and reduce heap
|
||||||
/// - Uses a flat `TokenStream` to improve iteration performance and reduce heap
|
|
||||||
/// overhead.
|
/// overhead.
|
||||||
/// - Consolidates contiguous characters into compound tokens (e.g. multi-digit
|
/// - Consolidates contiguous characters into compound tokens (e.g., multi-digit
|
||||||
/// numbers).
|
/// numbers).
|
||||||
/// - Easily extensible via the `transform` function to support higher-level
|
/// - Extensible via the `transform` function, enabling the creation of
|
||||||
/// parsing tasks.
|
/// higher-level constructs, like Markdown elements or syntax trees for a
|
||||||
|
/// custom DSL.
|
||||||
///
|
///
|
||||||
/// # Errors
|
/// # Error Handling
|
||||||
///
|
/// The lexer will return a `LexerError` if the input file cannot be opened or
|
||||||
/// Returns a `LexerError` if the file cannot be opened or read.
|
/// read. Errors include issues such as missing files, read failures, or invalid
|
||||||
|
/// input formats.
|
||||||
pub enum Lexer {}
|
pub enum Lexer {}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
impl Lexer
|
impl Lexer
|
||||||
{
|
{
|
||||||
/// Scans a file and produces a vector of transformed tokens.
|
/// Scans a file and generates a vector of transformed tokens based on the
|
||||||
|
/// provided `transform` function.
|
||||||
|
///
|
||||||
|
/// This method opens a file from the given `path`, reads the file line by
|
||||||
|
/// line, and converts the input into a stream of tokens. The tokens are
|
||||||
|
/// then passed to the `transform` function, which allows users to map
|
||||||
|
/// base tokens into domain-specific types.
|
||||||
|
///
|
||||||
|
/// # Parameters
|
||||||
|
/// - `path`: A path to the file to be lexically analyzed.
|
||||||
|
/// - `transform`: A function that takes a `TokenStream<TokenType>` and
|
||||||
|
/// transforms it into a `TokenStream<T>` where `T` is a domain-specific
|
||||||
|
/// type.
|
||||||
|
///
|
||||||
|
/// # Returns
|
||||||
|
/// A `Result<TokenStream<T>, LexerError>` where `T` is the transformed token
|
||||||
|
/// type, or an error.
|
||||||
|
///
|
||||||
|
/// # Errors
|
||||||
|
/// Returns a `LexerError` if the file cannot be opened or read.
|
||||||
pub fn scan_file<P, F, T>(path: P, transform: F)
|
pub fn scan_file<P, F, T>(path: P, transform: F)
|
||||||
-> Result<TokenStream<T>, LexerError>
|
-> Result<TokenStream<T>, LexerError>
|
||||||
where P: AsRef<std::path::Path>,
|
where P: AsRef<std::path::Path>,
|
||||||
@ -82,6 +102,7 @@ impl Lexer
|
|||||||
|
|
||||||
let reader = BufReader::with_capacity(BUFFER_SIZE, input_file);
|
let reader = BufReader::with_capacity(BUFFER_SIZE, input_file);
|
||||||
|
|
||||||
|
// Read the file line by line.
|
||||||
for line in reader.lines()
|
for line in reader.lines()
|
||||||
{
|
{
|
||||||
match line
|
match line
|
||||||
@ -93,7 +114,7 @@ impl Lexer
|
|||||||
Err(_) =>
|
Err(_) =>
|
||||||
{
|
{
|
||||||
return Err(LexerError::new("Unable to read line during \
|
return Err(LexerError::new("Unable to read line during \
|
||||||
Lexigraphical Analysis.",
|
Lexical Analysis.",
|
||||||
Span::default(),
|
Span::default(),
|
||||||
Some(path.as_ref()
|
Some(path.as_ref()
|
||||||
.to_string_lossy()
|
.to_string_lossy()
|
||||||
@ -102,10 +123,10 @@ impl Lexer
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Add the newline token after each line.
|
||||||
stream.push("\n".to_string(),
|
stream.push("\n".to_string(),
|
||||||
TokenType::Newline,
|
TokenType::Newline,
|
||||||
Span::with_single(cursor));
|
Span::with_single(cursor));
|
||||||
|
|
||||||
cursor.line += 1;
|
cursor.line += 1;
|
||||||
cursor.column = 0;
|
cursor.column = 0;
|
||||||
}
|
}
|
||||||
@ -113,7 +134,22 @@ impl Lexer
|
|||||||
Ok(transform(&stream))
|
Ok(transform(&stream))
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Scans a full in-memory string and returns transformed tokens.
|
/// Scans a full in-memory string and produces transformed tokens.
|
||||||
|
///
|
||||||
|
/// This method tokenizes the input string `text` and returns the transformed
|
||||||
|
/// tokens using the provided `transform` function. It's a convenient way
|
||||||
|
/// to perform lexical analysis on in-memory strings without needing to
|
||||||
|
/// read from a file.
|
||||||
|
///
|
||||||
|
/// # Parameters
|
||||||
|
/// - `text`: A string slice representing the in-memory input text to
|
||||||
|
/// analyze.
|
||||||
|
/// - `transform`: A function that transforms the base tokens into
|
||||||
|
/// domain-specific types.
|
||||||
|
///
|
||||||
|
/// # Returns
|
||||||
|
/// A `Result<TokenStream<T>, LexerError>` where `T` is the transformed token
|
||||||
|
/// type, or an error.
|
||||||
pub fn scan_text<F, T>(text: &str, transform: F)
|
pub fn scan_text<F, T>(text: &str, transform: F)
|
||||||
-> Result<TokenStream<T>, LexerError>
|
-> Result<TokenStream<T>, LexerError>
|
||||||
where F: FnOnce(&TokenStream<TokenType>) -> TokenStream<T>
|
where F: FnOnce(&TokenStream<TokenType>) -> TokenStream<T>
|
||||||
@ -121,30 +157,51 @@ impl Lexer
|
|||||||
let mut cursor = Position::default();
|
let mut cursor = Position::default();
|
||||||
let mut stream = TokenStream::new();
|
let mut stream = TokenStream::new();
|
||||||
|
|
||||||
|
// Process each line in the input string.
|
||||||
for line in text.lines()
|
for line in text.lines()
|
||||||
{
|
{
|
||||||
Self::scan(line, &mut stream, &mut cursor);
|
Self::scan(line, &mut stream, &mut cursor);
|
||||||
|
|
||||||
|
// Add the newline token after each line.
|
||||||
stream.push("\n".to_string(),
|
stream.push("\n".to_string(),
|
||||||
TokenType::Newline,
|
TokenType::Newline,
|
||||||
Span::with_single(cursor));
|
Span::with_single(cursor));
|
||||||
|
|
||||||
cursor.line += 1;
|
cursor.line += 1;
|
||||||
cursor.column = 0;
|
cursor.column = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Remove the last newline character if the text did not end with a
|
||||||
|
// newline.
|
||||||
|
if !text.ends_with('\n')
|
||||||
|
{
|
||||||
|
stream.pop();
|
||||||
|
}
|
||||||
|
|
||||||
Ok(transform(&stream))
|
Ok(transform(&stream))
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Internal: scans a single line of text into tokens.
|
/// Internal method that scans a single line of text into tokens.
|
||||||
|
///
|
||||||
|
/// This method processes each character of a line and generates the
|
||||||
|
/// corresponding token. It handles cases like numeric tokens, text
|
||||||
|
/// tokens, symbols, and whitespace.
|
||||||
|
///
|
||||||
|
/// # Parameters
|
||||||
|
/// - `line`: A line of text to be lexically analyzed.
|
||||||
|
/// - `stream`: A mutable reference to the token stream where the generated
|
||||||
|
/// tokens will be pushed.
|
||||||
|
/// - `cursor`: A mutable reference to the cursor position, which tracks the
|
||||||
|
/// current position in the input.
|
||||||
fn scan(line: &str, stream: &mut TokenStream<TokenType>,
|
fn scan(line: &str, stream: &mut TokenStream<TokenType>,
|
||||||
cursor: &mut Position)
|
cursor: &mut Position)
|
||||||
{
|
{
|
||||||
for c in line.chars()
|
for c in line.chars()
|
||||||
{
|
{
|
||||||
|
// Get the token type based on the character.
|
||||||
let variant = get_token_type(c);
|
let variant = get_token_type(c);
|
||||||
let last = stream.len().saturating_sub(1);
|
let last = stream.len().saturating_sub(1);
|
||||||
|
|
||||||
|
// Handle token merging for contiguous tokens like numbers or text.
|
||||||
if !stream.is_empty() &&
|
if !stream.is_empty() &&
|
||||||
variant == stream.variants[last] &&
|
variant == stream.variants[last] &&
|
||||||
(variant == TokenType::Numeric || variant == TokenType::Text)
|
(variant == TokenType::Numeric || variant == TokenType::Text)
|
||||||
@ -154,6 +211,7 @@ impl Lexer
|
|||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
// Add a new token to the stream.
|
||||||
stream.push(c.to_string(), variant, Span::with_single(*cursor));
|
stream.push(c.to_string(), variant, Span::with_single(*cursor));
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -164,6 +222,18 @@ impl Lexer
|
|||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
/// Determines the type of a token based on the current character.
|
||||||
|
///
|
||||||
|
/// This helper function is responsible for identifying whether the current
|
||||||
|
/// character is part of a known token type such as numeric, text, whitespace,
|
||||||
|
/// or symbol.
|
||||||
|
///
|
||||||
|
/// # Parameters
|
||||||
|
/// - `curr_char`: The current character to analyze.
|
||||||
|
///
|
||||||
|
/// # Returns
|
||||||
|
/// A `TokenType` corresponding to the character type (e.g., `Numeric`, `Text`,
|
||||||
|
/// `Whitespace`, etc.).
|
||||||
fn get_token_type(curr_char: char) -> TokenType
|
fn get_token_type(curr_char: char) -> TokenType
|
||||||
{
|
{
|
||||||
match curr_char
|
match curr_char
|
||||||
|
@ -7,14 +7,18 @@
|
|||||||
|
|
||||||
pub mod library;
|
pub mod library;
|
||||||
|
|
||||||
|
mod ast;
|
||||||
mod error;
|
mod error;
|
||||||
|
mod iter;
|
||||||
mod lexer;
|
mod lexer;
|
||||||
mod position;
|
mod position;
|
||||||
mod token;
|
mod token;
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
pub use crate::ast::*;
|
||||||
pub use crate::error::*;
|
pub use crate::error::*;
|
||||||
|
pub use crate::iter::*;
|
||||||
pub use crate::lexer::*;
|
pub use crate::lexer::*;
|
||||||
pub use crate::position::*;
|
pub use crate::position::*;
|
||||||
pub use crate::token::*;
|
pub use crate::token::*;
|
||||||
|
43
tests/iter.rs
Normal file
43
tests/iter.rs
Normal file
@ -0,0 +1,43 @@
|
|||||||
|
use rune::*;
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn peek_works()
|
||||||
|
{
|
||||||
|
let mut it = vec![1, 2, 3].into_iter().lookahead();
|
||||||
|
assert_eq!(it.peek(0), Some(&1));
|
||||||
|
assert_eq!(it.peek(1), Some(&2));
|
||||||
|
assert_eq!(it.peek(2), Some(&3));
|
||||||
|
assert_eq!(it.peek(3), None);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn peek_mut_modifies_item()
|
||||||
|
{
|
||||||
|
let mut it = vec![10, 20, 30].into_iter().lookahead();
|
||||||
|
if let Some(x) = it.peek_mut(1)
|
||||||
|
{
|
||||||
|
*x += 100;
|
||||||
|
}
|
||||||
|
assert_eq!(it.next(), Some(10));
|
||||||
|
assert_eq!(it.next(), Some(120));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn iterates_correctly()
|
||||||
|
{
|
||||||
|
let mut it = vec![1, 2, 3].into_iter().lookahead();
|
||||||
|
assert_eq!(it.next(), Some(1));
|
||||||
|
assert_eq!(it.next(), Some(2));
|
||||||
|
assert_eq!(it.next(), Some(3));
|
||||||
|
assert_eq!(it.next(), None);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn size_hint_accounts_for_buffer()
|
||||||
|
{
|
||||||
|
let mut it = vec![1, 2, 3, 4].into_iter().lookahead();
|
||||||
|
it.peek(2);
|
||||||
|
let (low, high) = it.size_hint();
|
||||||
|
assert!(low >= 4);
|
||||||
|
assert_eq!(high, Some(4));
|
||||||
|
}
|
@ -53,45 +53,47 @@ fn cleanup_temp_file(path: &PathBuf)
|
|||||||
|
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_basic_lexing()
|
fn basic_lexing()
|
||||||
{
|
{
|
||||||
let tokens =
|
let tokens =
|
||||||
Lexer::scan_text("magic runes", dummy_transform).expect("Lexer should \
|
Lexer::scan_text("magic runes", dummy_transform).expect("Lexer should \
|
||||||
succeed");
|
succeed");
|
||||||
|
|
||||||
let tokens = tokens.into_iter().map(|t| { (*t.variant, String::from(t.lexeme))}).collect::<Vec<_>>();
|
let tokens = tokens.into_iter()
|
||||||
|
.map(|t| (*t.variant, String::from(t.lexeme)))
|
||||||
|
.collect::<Vec<_>>();
|
||||||
|
|
||||||
let expected = vec![(TokenType::Text, "magic".to_string()),
|
let expected = vec![(TokenType::Text, "magic".to_string()),
|
||||||
(TokenType::Whitespace, " ".to_string()),
|
(TokenType::Whitespace, " ".to_string()),
|
||||||
(TokenType::Text, "runes".to_string()),
|
(TokenType::Text, "runes".to_string()),];
|
||||||
(TokenType::Newline, "\n".to_string()),];
|
|
||||||
|
|
||||||
assert_eq!(tokens, expected);
|
assert_eq!(tokens, expected);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_symbols_and_numbers()
|
fn symbols_and_numbers()
|
||||||
{
|
{
|
||||||
let tokens =
|
let tokens =
|
||||||
Lexer::scan_text("13 + 37", dummy_transform).expect("Lexer should \
|
Lexer::scan_text("13 + 37", dummy_transform).expect("Lexer should \
|
||||||
succeed");
|
succeed");
|
||||||
|
|
||||||
let tokens = tokens.into_iter().map(|t| { (*t.variant, String::from(t.lexeme))}).collect::<Vec<_>>();
|
let tokens = tokens.into_iter()
|
||||||
|
.map(|t| (*t.variant, String::from(t.lexeme)))
|
||||||
|
.collect::<Vec<_>>();
|
||||||
|
|
||||||
let expected = vec![(TokenType::Numeric, "13".into()),
|
let expected = vec![(TokenType::Numeric, "13".into()),
|
||||||
(TokenType::Whitespace, " ".into()),
|
(TokenType::Whitespace, " ".into()),
|
||||||
(TokenType::Symbol, "+".into()),
|
(TokenType::Symbol, "+".into()),
|
||||||
(TokenType::Whitespace, " ".into()),
|
(TokenType::Whitespace, " ".into()),
|
||||||
(TokenType::Numeric, "37".into()),
|
(TokenType::Numeric, "37".into()),];
|
||||||
(TokenType::Newline, "\n".into()),];
|
|
||||||
|
|
||||||
assert_eq!(tokens, expected);
|
assert_eq!(tokens, expected);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_lexer_with_cases()
|
fn lexer_with_cases()
|
||||||
{
|
{
|
||||||
let cases = vec![TestCase { name: "simple_words",
|
let cases = vec![TestCase { name: "simple_words",
|
||||||
input: "magic rune",
|
input: "magic rune",
|
||||||
@ -129,16 +131,16 @@ fn test_lexer_with_cases()
|
|||||||
on case '{}'",
|
on case '{}'",
|
||||||
case.name));
|
case.name));
|
||||||
|
|
||||||
let result = result.into_iter().map(|t| { (*t.variant, String::from(t.lexeme))}).collect::<Vec<_>>();
|
let result = result.into_iter()
|
||||||
|
.map(|t| (*t.variant, String::from(t.lexeme)))
|
||||||
|
.collect::<Vec<_>>();
|
||||||
|
|
||||||
let expected = case.expected
|
let expected = case.expected
|
||||||
.iter()
|
.iter()
|
||||||
.map(|(ty, s)| (*ty, s.to_string()))
|
.map(|(ty, s)| (*ty, s.to_string()))
|
||||||
.collect::<Vec<_>>();
|
.collect::<Vec<_>>();
|
||||||
|
|
||||||
assert_eq!(result, expected,
|
assert_eq!(result, expected, "Mismatch in test case '{}'", case.name);
|
||||||
"Mismatch in test case '{}'",
|
|
||||||
case.name);
|
|
||||||
|
|
||||||
cleanup_temp_file(&path);
|
cleanup_temp_file(&path);
|
||||||
}
|
}
|
Loading…
x
Reference in New Issue
Block a user