[#4] The initial AST.
This is the initial design of the AST. It is built in a data oriented style. It also needs iterators over the AST and the optimized AST as well as some more transformation functions.
This commit is contained in:
parent
f5780f50c2
commit
e604bf172b
327
src/ast.rs
Normal file
327
src/ast.rs
Normal file
@ -0,0 +1,327 @@
|
|||||||
|
use std::collections::VecDeque;
|
||||||
|
|
||||||
|
use super::position::Span;
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
/// A unique identifier for a node in the AST. Internally, this is just an index
|
||||||
|
/// into the node arrays.
|
||||||
|
pub type NodeId = usize;
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
/// The possible orders in which an AST may be stored for traversal.
|
||||||
|
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
||||||
|
pub enum TraversalOrder
|
||||||
|
{
|
||||||
|
/// Depth-first search (pre-order) layout.
|
||||||
|
DFS,
|
||||||
|
/// Breadth-first search layout.
|
||||||
|
BFS
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
/// The data associated with a single node in the AST.
|
||||||
|
#[derive(Debug, Clone)]
|
||||||
|
pub struct NodeData<T>
|
||||||
|
{
|
||||||
|
pub span: Span,
|
||||||
|
pub data: T
|
||||||
|
}
|
||||||
|
|
||||||
|
/// The mutable AST structure used during parsing. Nodes are created
|
||||||
|
/// incrementally and linked via parent relationships. Traversal order is not
|
||||||
|
/// guaranteed until `optimize()` is called.
|
||||||
|
pub struct Ast<T>
|
||||||
|
{
|
||||||
|
nodes: Vec<NodeData<T>>,
|
||||||
|
parents: Vec<Option<NodeId>>
|
||||||
|
}
|
||||||
|
|
||||||
|
/// An optimized, immutable AST layout produced from `Ast<T>::optimize`.
|
||||||
|
/// This structure is ideal for traversal, analysis, and code generation.
|
||||||
|
pub struct OptimizedAst<T>
|
||||||
|
{
|
||||||
|
/// Node data in a linear layout (DFS or BFS order).
|
||||||
|
pub nodes: Vec<NodeData<T>>,
|
||||||
|
/// Each node’s parent, if any.
|
||||||
|
pub parents: Vec<Option<NodeId>>,
|
||||||
|
/// The traversal order the nodes are stored in.
|
||||||
|
pub order: TraversalOrder
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
impl<T> Ast<T>
|
||||||
|
{
|
||||||
|
/// Creates a new, empty AST.
|
||||||
|
pub fn new() -> Self
|
||||||
|
{
|
||||||
|
Ast { nodes: Vec::new(),
|
||||||
|
parents: Vec::new() }
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Returns the parent of a node, if any.
|
||||||
|
pub fn get_parent(&self, id: NodeId) -> Option<NodeId>
|
||||||
|
{
|
||||||
|
self.parents.get(id).copied().flatten()
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Returns a reference to the node data at the given ID, if it exists.
|
||||||
|
pub fn get(&self, id: NodeId) -> Option<&NodeData<T>>
|
||||||
|
{
|
||||||
|
self.nodes.get(id)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Returns a mutable reference to the node data at the given ID, if it
|
||||||
|
/// exists.
|
||||||
|
pub fn get_mut(&mut self, id: NodeId) -> Option<&mut NodeData<T>>
|
||||||
|
{
|
||||||
|
self.nodes.get_mut(id)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Adds a new node to the AST.
|
||||||
|
///
|
||||||
|
/// - `data`: The custom payload of the node (usually an enum or struct).
|
||||||
|
/// - `span`: The source span the node represents.
|
||||||
|
/// - `parent`: Optional parent NodeId to attach this node to.
|
||||||
|
///
|
||||||
|
/// Returns the NodeId of the newly added node.
|
||||||
|
pub fn add_node(&mut self, data: T, span: Span, parent: Option<NodeId>)
|
||||||
|
-> NodeId
|
||||||
|
{
|
||||||
|
let id = self.nodes.len();
|
||||||
|
self.nodes.push(NodeData { data, span });
|
||||||
|
self.parents.push(parent);
|
||||||
|
id
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Joins another AST into this one, returning a mapping from old node IDs
|
||||||
|
/// in `other` to new node IDs in `self`.
|
||||||
|
///
|
||||||
|
/// Optionally attaches all root nodes of the other AST to a parent node
|
||||||
|
/// in the current AST.
|
||||||
|
pub fn join(&mut self, other: Ast<T>, attach_to: Option<NodeId>)
|
||||||
|
-> Vec<NodeId>
|
||||||
|
{
|
||||||
|
let base_id = self.nodes.len();
|
||||||
|
let mut id_map = Vec::with_capacity(other.nodes.len());
|
||||||
|
|
||||||
|
for (i, node) in other.nodes.into_iter().enumerate()
|
||||||
|
{
|
||||||
|
self.nodes.push(node);
|
||||||
|
let new_parent = match other.parents[i]
|
||||||
|
{
|
||||||
|
Some(pid) => Some(base_id + pid),
|
||||||
|
None => attach_to // attach root nodes to given parent if provided
|
||||||
|
};
|
||||||
|
self.parents.push(new_parent);
|
||||||
|
id_map.push(base_id + i);
|
||||||
|
}
|
||||||
|
|
||||||
|
id_map
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Prunes the subtree rooted at `root`, compacting the AST in place.
|
||||||
|
/// Node IDs will change after this operation.
|
||||||
|
pub fn prune(&mut self, root: NodeId)
|
||||||
|
{
|
||||||
|
let mut to_remove = Vec::new();
|
||||||
|
collect_descendants(root, &self.parents, &mut to_remove);
|
||||||
|
to_remove.push(root);
|
||||||
|
|
||||||
|
let mut is_removed = vec![false; self.nodes.len()];
|
||||||
|
for &id in &to_remove
|
||||||
|
{
|
||||||
|
is_removed[id] = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
let mut remap = vec![None; self.nodes.len()];
|
||||||
|
let mut next_insert = 0;
|
||||||
|
|
||||||
|
for i in 0..self.nodes.len()
|
||||||
|
{
|
||||||
|
if !is_removed[i]
|
||||||
|
{
|
||||||
|
if i != next_insert
|
||||||
|
{
|
||||||
|
self.nodes.swap(i, next_insert);
|
||||||
|
self.parents.swap(i, next_insert);
|
||||||
|
}
|
||||||
|
remap[i] = Some(next_insert);
|
||||||
|
next_insert += 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
self.nodes.truncate(next_insert);
|
||||||
|
self.parents.truncate(next_insert);
|
||||||
|
|
||||||
|
for parent in self.parents.iter_mut()
|
||||||
|
{
|
||||||
|
if let Some(pid) = *parent
|
||||||
|
{
|
||||||
|
*parent = remap[pid];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Optimizes the AST layout for a specific traversal order (DFS or BFS).
|
||||||
|
///
|
||||||
|
/// This consumes the `Ast`, rearranges the internal storage so that
|
||||||
|
/// iterating over the nodes reflects the chosen traversal order, and
|
||||||
|
/// returns a new, immutable `OptimizedAst<T>`.
|
||||||
|
///
|
||||||
|
/// No need for `T: Clone` anymore, since we will move data instead of
|
||||||
|
/// cloning it.
|
||||||
|
pub fn optimize(self, order: TraversalOrder) -> OptimizedAst<T>
|
||||||
|
{
|
||||||
|
let ordering = match order
|
||||||
|
{
|
||||||
|
TraversalOrder::DFS => dfs_order(&self.parents),
|
||||||
|
TraversalOrder::BFS => bfs_order(&self.parents)
|
||||||
|
};
|
||||||
|
|
||||||
|
let mut remap = vec![0; self.nodes.len()];
|
||||||
|
for (new_id, &old_id) in ordering.iter().enumerate()
|
||||||
|
{
|
||||||
|
remap[old_id] = new_id;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Wrap nodes in Option to allow taking them by value without cloning
|
||||||
|
let mut nodes_opt: Vec<Option<NodeData<T>>> =
|
||||||
|
self.nodes.into_iter().map(Some).collect();
|
||||||
|
|
||||||
|
let mut new_nodes = Vec::with_capacity(nodes_opt.len());
|
||||||
|
let mut new_parents = vec![None; self.parents.len()];
|
||||||
|
|
||||||
|
for &old_id in &ordering
|
||||||
|
{
|
||||||
|
let new_id = remap[old_id];
|
||||||
|
let node = nodes_opt[old_id].take()
|
||||||
|
.expect("Node was already moved out");
|
||||||
|
|
||||||
|
let parent = self.parents[old_id].map(|pid| remap[pid]);
|
||||||
|
|
||||||
|
new_nodes.push(node);
|
||||||
|
new_parents[new_id] = parent;
|
||||||
|
}
|
||||||
|
|
||||||
|
OptimizedAst { nodes: new_nodes,
|
||||||
|
parents: new_parents,
|
||||||
|
order }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
/// Helper to recursively collect all descendants of a node.
|
||||||
|
fn collect_descendants(root: NodeId, parents: &[Option<NodeId>],
|
||||||
|
acc: &mut Vec<NodeId>)
|
||||||
|
{
|
||||||
|
for (i, &parent) in parents.iter().enumerate()
|
||||||
|
{
|
||||||
|
if parent == Some(root)
|
||||||
|
{
|
||||||
|
collect_descendants(i, parents, acc);
|
||||||
|
acc.push(i);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Recursively visits nodes in a depth-first (pre-order) manner starting from
|
||||||
|
/// `current`, building up the DFS traversal order.
|
||||||
|
///
|
||||||
|
/// - `current`: The current node ID being visited.
|
||||||
|
/// - `parents`: A slice representing the parent relationship for each node
|
||||||
|
/// (index = child, value = optional parent).
|
||||||
|
/// - `order`: A mutable vector that will accumulate the DFS traversal order.
|
||||||
|
/// - `visited`: A mutable slice used to track which nodes have already been
|
||||||
|
/// visited.
|
||||||
|
fn visit(current: NodeId, parents: &[Option<NodeId>], order: &mut Vec<NodeId>,
|
||||||
|
visited: &mut [bool])
|
||||||
|
{
|
||||||
|
// Skip this node if it's already been visited
|
||||||
|
if visited[current]
|
||||||
|
{
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Mark the node as visited to avoid reprocessing it
|
||||||
|
visited[current] = true;
|
||||||
|
|
||||||
|
// Add this node to the traversal order (pre-order)
|
||||||
|
order.push(current);
|
||||||
|
|
||||||
|
// Recursively visit all child nodes (i.e., nodes that list `current` as
|
||||||
|
// their parent)
|
||||||
|
for (i, &parent) in parents.iter().enumerate()
|
||||||
|
{
|
||||||
|
if parent == Some(current)
|
||||||
|
{
|
||||||
|
visit(i, parents, order, visited);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Computes the DFS (depth-first, pre-order) ordering of nodes based on the
|
||||||
|
/// parent table.
|
||||||
|
///
|
||||||
|
/// Returns a Vec<NodeId> containing the node IDs in DFS order.
|
||||||
|
fn dfs_order(parents: &[Option<NodeId>]) -> Vec<NodeId>
|
||||||
|
{
|
||||||
|
let mut order = Vec::new();
|
||||||
|
let mut visited = vec![false; parents.len()];
|
||||||
|
|
||||||
|
// Start DFS from all root nodes (nodes with no parent).
|
||||||
|
for (i, &parent) in parents.iter().enumerate()
|
||||||
|
{
|
||||||
|
if parent.is_none()
|
||||||
|
{
|
||||||
|
visit(i, parents, &mut order, &mut visited);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
order
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Computes the BFS (breadth-first) ordering of nodes based on the parent
|
||||||
|
/// table.
|
||||||
|
///
|
||||||
|
/// Returns a Vec<NodeId> containing the node IDs in BFS order.
|
||||||
|
fn bfs_order(parents: &[Option<NodeId>]) -> Vec<NodeId>
|
||||||
|
{
|
||||||
|
let mut order = Vec::new();
|
||||||
|
let mut visited = vec![false; parents.len()];
|
||||||
|
let mut queue = VecDeque::new();
|
||||||
|
|
||||||
|
// Start BFS from all root nodes.
|
||||||
|
for (i, &parent) in parents.iter().enumerate()
|
||||||
|
{
|
||||||
|
if parent.is_none()
|
||||||
|
{
|
||||||
|
queue.push_back(i);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
while let Some(current) = queue.pop_front()
|
||||||
|
{
|
||||||
|
if visited[current]
|
||||||
|
{
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
visited[current] = true;
|
||||||
|
order.push(current);
|
||||||
|
|
||||||
|
for (i, &parent) in parents.iter().enumerate()
|
||||||
|
{
|
||||||
|
if parent == Some(current)
|
||||||
|
{
|
||||||
|
queue.push_back(i);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
order
|
||||||
|
}
|
@ -7,6 +7,7 @@
|
|||||||
|
|
||||||
pub mod library;
|
pub mod library;
|
||||||
|
|
||||||
|
mod ast;
|
||||||
mod error;
|
mod error;
|
||||||
mod lexer;
|
mod lexer;
|
||||||
mod position;
|
mod position;
|
||||||
@ -14,6 +15,7 @@ mod token;
|
|||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
pub use crate::ast::*;
|
||||||
pub use crate::error::*;
|
pub use crate::error::*;
|
||||||
pub use crate::lexer::*;
|
pub use crate::lexer::*;
|
||||||
pub use crate::position::*;
|
pub use crate::position::*;
|
||||||
|
Loading…
x
Reference in New Issue
Block a user