[#4] The initial AST.

This is the initial design of the AST. It is built in a data oriented
style. It also needs iterators over the AST and the optimized AST
as well as some more transformation functions.
This commit is contained in:
Myrddin Dundragon 2025-04-22 02:20:20 -04:00
parent f5780f50c2
commit e604bf172b
2 changed files with 329 additions and 0 deletions

327
src/ast.rs Normal file
View File

@ -0,0 +1,327 @@
use std::collections::VecDeque;
use super::position::Span;
/// A unique identifier for a node in the AST. Internally, this is just an index
/// into the node arrays.
pub type NodeId = usize;
/// The possible orders in which an AST may be stored for traversal.
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum TraversalOrder
{
/// Depth-first search (pre-order) layout.
DFS,
/// Breadth-first search layout.
BFS
}
/// The data associated with a single node in the AST.
#[derive(Debug, Clone)]
pub struct NodeData<T>
{
pub span: Span,
pub data: T
}
/// The mutable AST structure used during parsing. Nodes are created
/// incrementally and linked via parent relationships. Traversal order is not
/// guaranteed until `optimize()` is called.
pub struct Ast<T>
{
nodes: Vec<NodeData<T>>,
parents: Vec<Option<NodeId>>
}
/// An optimized, immutable AST layout produced from `Ast<T>::optimize`.
/// This structure is ideal for traversal, analysis, and code generation.
pub struct OptimizedAst<T>
{
/// Node data in a linear layout (DFS or BFS order).
pub nodes: Vec<NodeData<T>>,
/// Each nodes parent, if any.
pub parents: Vec<Option<NodeId>>,
/// The traversal order the nodes are stored in.
pub order: TraversalOrder
}
impl<T> Ast<T>
{
/// Creates a new, empty AST.
pub fn new() -> Self
{
Ast { nodes: Vec::new(),
parents: Vec::new() }
}
/// Returns the parent of a node, if any.
pub fn get_parent(&self, id: NodeId) -> Option<NodeId>
{
self.parents.get(id).copied().flatten()
}
/// Returns a reference to the node data at the given ID, if it exists.
pub fn get(&self, id: NodeId) -> Option<&NodeData<T>>
{
self.nodes.get(id)
}
/// Returns a mutable reference to the node data at the given ID, if it
/// exists.
pub fn get_mut(&mut self, id: NodeId) -> Option<&mut NodeData<T>>
{
self.nodes.get_mut(id)
}
/// Adds a new node to the AST.
///
/// - `data`: The custom payload of the node (usually an enum or struct).
/// - `span`: The source span the node represents.
/// - `parent`: Optional parent NodeId to attach this node to.
///
/// Returns the NodeId of the newly added node.
pub fn add_node(&mut self, data: T, span: Span, parent: Option<NodeId>)
-> NodeId
{
let id = self.nodes.len();
self.nodes.push(NodeData { data, span });
self.parents.push(parent);
id
}
/// Joins another AST into this one, returning a mapping from old node IDs
/// in `other` to new node IDs in `self`.
///
/// Optionally attaches all root nodes of the other AST to a parent node
/// in the current AST.
pub fn join(&mut self, other: Ast<T>, attach_to: Option<NodeId>)
-> Vec<NodeId>
{
let base_id = self.nodes.len();
let mut id_map = Vec::with_capacity(other.nodes.len());
for (i, node) in other.nodes.into_iter().enumerate()
{
self.nodes.push(node);
let new_parent = match other.parents[i]
{
Some(pid) => Some(base_id + pid),
None => attach_to // attach root nodes to given parent if provided
};
self.parents.push(new_parent);
id_map.push(base_id + i);
}
id_map
}
/// Prunes the subtree rooted at `root`, compacting the AST in place.
/// Node IDs will change after this operation.
pub fn prune(&mut self, root: NodeId)
{
let mut to_remove = Vec::new();
collect_descendants(root, &self.parents, &mut to_remove);
to_remove.push(root);
let mut is_removed = vec![false; self.nodes.len()];
for &id in &to_remove
{
is_removed[id] = true;
}
let mut remap = vec![None; self.nodes.len()];
let mut next_insert = 0;
for i in 0..self.nodes.len()
{
if !is_removed[i]
{
if i != next_insert
{
self.nodes.swap(i, next_insert);
self.parents.swap(i, next_insert);
}
remap[i] = Some(next_insert);
next_insert += 1;
}
}
self.nodes.truncate(next_insert);
self.parents.truncate(next_insert);
for parent in self.parents.iter_mut()
{
if let Some(pid) = *parent
{
*parent = remap[pid];
}
}
}
/// Optimizes the AST layout for a specific traversal order (DFS or BFS).
///
/// This consumes the `Ast`, rearranges the internal storage so that
/// iterating over the nodes reflects the chosen traversal order, and
/// returns a new, immutable `OptimizedAst<T>`.
///
/// No need for `T: Clone` anymore, since we will move data instead of
/// cloning it.
pub fn optimize(self, order: TraversalOrder) -> OptimizedAst<T>
{
let ordering = match order
{
TraversalOrder::DFS => dfs_order(&self.parents),
TraversalOrder::BFS => bfs_order(&self.parents)
};
let mut remap = vec![0; self.nodes.len()];
for (new_id, &old_id) in ordering.iter().enumerate()
{
remap[old_id] = new_id;
}
// Wrap nodes in Option to allow taking them by value without cloning
let mut nodes_opt: Vec<Option<NodeData<T>>> =
self.nodes.into_iter().map(Some).collect();
let mut new_nodes = Vec::with_capacity(nodes_opt.len());
let mut new_parents = vec![None; self.parents.len()];
for &old_id in &ordering
{
let new_id = remap[old_id];
let node = nodes_opt[old_id].take()
.expect("Node was already moved out");
let parent = self.parents[old_id].map(|pid| remap[pid]);
new_nodes.push(node);
new_parents[new_id] = parent;
}
OptimizedAst { nodes: new_nodes,
parents: new_parents,
order }
}
}
/// Helper to recursively collect all descendants of a node.
fn collect_descendants(root: NodeId, parents: &[Option<NodeId>],
acc: &mut Vec<NodeId>)
{
for (i, &parent) in parents.iter().enumerate()
{
if parent == Some(root)
{
collect_descendants(i, parents, acc);
acc.push(i);
}
}
}
/// Recursively visits nodes in a depth-first (pre-order) manner starting from
/// `current`, building up the DFS traversal order.
///
/// - `current`: The current node ID being visited.
/// - `parents`: A slice representing the parent relationship for each node
/// (index = child, value = optional parent).
/// - `order`: A mutable vector that will accumulate the DFS traversal order.
/// - `visited`: A mutable slice used to track which nodes have already been
/// visited.
fn visit(current: NodeId, parents: &[Option<NodeId>], order: &mut Vec<NodeId>,
visited: &mut [bool])
{
// Skip this node if it's already been visited
if visited[current]
{
return;
}
// Mark the node as visited to avoid reprocessing it
visited[current] = true;
// Add this node to the traversal order (pre-order)
order.push(current);
// Recursively visit all child nodes (i.e., nodes that list `current` as
// their parent)
for (i, &parent) in parents.iter().enumerate()
{
if parent == Some(current)
{
visit(i, parents, order, visited);
}
}
}
/// Computes the DFS (depth-first, pre-order) ordering of nodes based on the
/// parent table.
///
/// Returns a Vec<NodeId> containing the node IDs in DFS order.
fn dfs_order(parents: &[Option<NodeId>]) -> Vec<NodeId>
{
let mut order = Vec::new();
let mut visited = vec![false; parents.len()];
// Start DFS from all root nodes (nodes with no parent).
for (i, &parent) in parents.iter().enumerate()
{
if parent.is_none()
{
visit(i, parents, &mut order, &mut visited);
}
}
order
}
/// Computes the BFS (breadth-first) ordering of nodes based on the parent
/// table.
///
/// Returns a Vec<NodeId> containing the node IDs in BFS order.
fn bfs_order(parents: &[Option<NodeId>]) -> Vec<NodeId>
{
let mut order = Vec::new();
let mut visited = vec![false; parents.len()];
let mut queue = VecDeque::new();
// Start BFS from all root nodes.
for (i, &parent) in parents.iter().enumerate()
{
if parent.is_none()
{
queue.push_back(i);
}
}
while let Some(current) = queue.pop_front()
{
if visited[current]
{
continue;
}
visited[current] = true;
order.push(current);
for (i, &parent) in parents.iter().enumerate()
{
if parent == Some(current)
{
queue.push_back(i);
}
}
}
order
}

View File

@ -7,6 +7,7 @@
pub mod library;
mod ast;
mod error;
mod lexer;
mod position;
@ -14,6 +15,7 @@ mod token;
pub use crate::ast::*;
pub use crate::error::*;
pub use crate::lexer::*;
pub use crate::position::*;