diff --git a/src/ast.rs b/src/ast.rs new file mode 100644 index 0000000..9337e96 --- /dev/null +++ b/src/ast.rs @@ -0,0 +1,327 @@ +use std::collections::VecDeque; + +use super::position::Span; + + + +/// A unique identifier for a node in the AST. Internally, this is just an index +/// into the node arrays. +pub type NodeId = usize; + + + +/// The possible orders in which an AST may be stored for traversal. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum TraversalOrder +{ + /// Depth-first search (pre-order) layout. + DFS, + /// Breadth-first search layout. + BFS +} + + + +/// The data associated with a single node in the AST. +#[derive(Debug, Clone)] +pub struct NodeData +{ + pub span: Span, + pub data: T +} + +/// The mutable AST structure used during parsing. Nodes are created +/// incrementally and linked via parent relationships. Traversal order is not +/// guaranteed until `optimize()` is called. +pub struct Ast +{ + nodes: Vec>, + parents: Vec> +} + +/// An optimized, immutable AST layout produced from `Ast::optimize`. +/// This structure is ideal for traversal, analysis, and code generation. +pub struct OptimizedAst +{ + /// Node data in a linear layout (DFS or BFS order). + pub nodes: Vec>, + /// Each node’s parent, if any. + pub parents: Vec>, + /// The traversal order the nodes are stored in. + pub order: TraversalOrder +} + + + +impl Ast +{ + /// Creates a new, empty AST. + pub fn new() -> Self + { + Ast { nodes: Vec::new(), + parents: Vec::new() } + } + + /// Returns the parent of a node, if any. + pub fn get_parent(&self, id: NodeId) -> Option + { + self.parents.get(id).copied().flatten() + } + + /// Returns a reference to the node data at the given ID, if it exists. + pub fn get(&self, id: NodeId) -> Option<&NodeData> + { + self.nodes.get(id) + } + + /// Returns a mutable reference to the node data at the given ID, if it + /// exists. + pub fn get_mut(&mut self, id: NodeId) -> Option<&mut NodeData> + { + self.nodes.get_mut(id) + } + + /// Adds a new node to the AST. + /// + /// - `data`: The custom payload of the node (usually an enum or struct). + /// - `span`: The source span the node represents. + /// - `parent`: Optional parent NodeId to attach this node to. + /// + /// Returns the NodeId of the newly added node. + pub fn add_node(&mut self, data: T, span: Span, parent: Option) + -> NodeId + { + let id = self.nodes.len(); + self.nodes.push(NodeData { data, span }); + self.parents.push(parent); + id + } + + /// Joins another AST into this one, returning a mapping from old node IDs + /// in `other` to new node IDs in `self`. + /// + /// Optionally attaches all root nodes of the other AST to a parent node + /// in the current AST. + pub fn join(&mut self, other: Ast, attach_to: Option) + -> Vec + { + let base_id = self.nodes.len(); + let mut id_map = Vec::with_capacity(other.nodes.len()); + + for (i, node) in other.nodes.into_iter().enumerate() + { + self.nodes.push(node); + let new_parent = match other.parents[i] + { + Some(pid) => Some(base_id + pid), + None => attach_to // attach root nodes to given parent if provided + }; + self.parents.push(new_parent); + id_map.push(base_id + i); + } + + id_map + } + + /// Prunes the subtree rooted at `root`, compacting the AST in place. + /// Node IDs will change after this operation. + pub fn prune(&mut self, root: NodeId) + { + let mut to_remove = Vec::new(); + collect_descendants(root, &self.parents, &mut to_remove); + to_remove.push(root); + + let mut is_removed = vec![false; self.nodes.len()]; + for &id in &to_remove + { + is_removed[id] = true; + } + + let mut remap = vec![None; self.nodes.len()]; + let mut next_insert = 0; + + for i in 0..self.nodes.len() + { + if !is_removed[i] + { + if i != next_insert + { + self.nodes.swap(i, next_insert); + self.parents.swap(i, next_insert); + } + remap[i] = Some(next_insert); + next_insert += 1; + } + } + + self.nodes.truncate(next_insert); + self.parents.truncate(next_insert); + + for parent in self.parents.iter_mut() + { + if let Some(pid) = *parent + { + *parent = remap[pid]; + } + } + } + + /// Optimizes the AST layout for a specific traversal order (DFS or BFS). + /// + /// This consumes the `Ast`, rearranges the internal storage so that + /// iterating over the nodes reflects the chosen traversal order, and + /// returns a new, immutable `OptimizedAst`. + /// + /// No need for `T: Clone` anymore, since we will move data instead of + /// cloning it. + pub fn optimize(self, order: TraversalOrder) -> OptimizedAst + { + let ordering = match order + { + TraversalOrder::DFS => dfs_order(&self.parents), + TraversalOrder::BFS => bfs_order(&self.parents) + }; + + let mut remap = vec![0; self.nodes.len()]; + for (new_id, &old_id) in ordering.iter().enumerate() + { + remap[old_id] = new_id; + } + + // Wrap nodes in Option to allow taking them by value without cloning + let mut nodes_opt: Vec>> = + self.nodes.into_iter().map(Some).collect(); + + let mut new_nodes = Vec::with_capacity(nodes_opt.len()); + let mut new_parents = vec![None; self.parents.len()]; + + for &old_id in &ordering + { + let new_id = remap[old_id]; + let node = nodes_opt[old_id].take() + .expect("Node was already moved out"); + + let parent = self.parents[old_id].map(|pid| remap[pid]); + + new_nodes.push(node); + new_parents[new_id] = parent; + } + + OptimizedAst { nodes: new_nodes, + parents: new_parents, + order } + } +} + + + +/// Helper to recursively collect all descendants of a node. +fn collect_descendants(root: NodeId, parents: &[Option], + acc: &mut Vec) +{ + for (i, &parent) in parents.iter().enumerate() + { + if parent == Some(root) + { + collect_descendants(i, parents, acc); + acc.push(i); + } + } +} + +/// Recursively visits nodes in a depth-first (pre-order) manner starting from +/// `current`, building up the DFS traversal order. +/// +/// - `current`: The current node ID being visited. +/// - `parents`: A slice representing the parent relationship for each node +/// (index = child, value = optional parent). +/// - `order`: A mutable vector that will accumulate the DFS traversal order. +/// - `visited`: A mutable slice used to track which nodes have already been +/// visited. +fn visit(current: NodeId, parents: &[Option], order: &mut Vec, + visited: &mut [bool]) +{ + // Skip this node if it's already been visited + if visited[current] + { + return; + } + + // Mark the node as visited to avoid reprocessing it + visited[current] = true; + + // Add this node to the traversal order (pre-order) + order.push(current); + + // Recursively visit all child nodes (i.e., nodes that list `current` as + // their parent) + for (i, &parent) in parents.iter().enumerate() + { + if parent == Some(current) + { + visit(i, parents, order, visited); + } + } +} + +/// Computes the DFS (depth-first, pre-order) ordering of nodes based on the +/// parent table. +/// +/// Returns a Vec containing the node IDs in DFS order. +fn dfs_order(parents: &[Option]) -> Vec +{ + let mut order = Vec::new(); + let mut visited = vec![false; parents.len()]; + + // Start DFS from all root nodes (nodes with no parent). + for (i, &parent) in parents.iter().enumerate() + { + if parent.is_none() + { + visit(i, parents, &mut order, &mut visited); + } + } + + order +} + +/// Computes the BFS (breadth-first) ordering of nodes based on the parent +/// table. +/// +/// Returns a Vec containing the node IDs in BFS order. +fn bfs_order(parents: &[Option]) -> Vec +{ + let mut order = Vec::new(); + let mut visited = vec![false; parents.len()]; + let mut queue = VecDeque::new(); + + // Start BFS from all root nodes. + for (i, &parent) in parents.iter().enumerate() + { + if parent.is_none() + { + queue.push_back(i); + } + } + + while let Some(current) = queue.pop_front() + { + if visited[current] + { + continue; + } + + visited[current] = true; + order.push(current); + + for (i, &parent) in parents.iter().enumerate() + { + if parent == Some(current) + { + queue.push_back(i); + } + } + } + + order +} diff --git a/src/lib.rs b/src/lib.rs index 9313f7a..0532f24 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -7,6 +7,7 @@ pub mod library; +mod ast; mod error; mod lexer; mod position; @@ -14,6 +15,7 @@ mod token; +pub use crate::ast::*; pub use crate::error::*; pub use crate::lexer::*; pub use crate::position::*;