[#3] TokenStream now hold generic variants.
This makes it so that the TokenStream and all it's associated Token types use a generic when dealing with the variant of the Token. Span was also given the ability to merge with another span. This will make it easier to track the span as users group TokenTypes together to make their domain specific types. All tests and examples were updated for this change. The version was incremented to 0.2.0.
This commit is contained in:
25
src/lexer.rs
25
src/lexer.rs
@ -31,24 +31,17 @@ const BUFFER_SIZE: usize = 1024 * 1024;
|
||||
/// ```rust
|
||||
/// use rune::{Lexer, TokenStream, TokenType};
|
||||
///
|
||||
/// fn transform(tokens: &TokenStream) -> Vec<(TokenType, String)>
|
||||
/// fn transform(tokens: &TokenStream<TokenType>) -> TokenStream<TokenType>
|
||||
/// {
|
||||
/// let mut new_tokens = Vec::new();
|
||||
///
|
||||
/// for token in tokens
|
||||
/// {
|
||||
/// new_tokens.push((*token.variant, token.lexeme.to_string()));
|
||||
/// }
|
||||
///
|
||||
/// new_tokens
|
||||
/// tokens.clone()
|
||||
/// }
|
||||
///
|
||||
/// let tokens = Lexer::scan_text("Runes += 42", transform).unwrap();
|
||||
///
|
||||
/// // The tuple here is from the transform functions return type.
|
||||
/// for (ty, lexeme) in tokens
|
||||
/// for token in &tokens
|
||||
/// {
|
||||
/// println!("{:?}: {:?}", ty, lexeme);
|
||||
/// println!("{}", token);
|
||||
/// }
|
||||
/// ```
|
||||
///
|
||||
@ -72,9 +65,9 @@ impl Lexer
|
||||
{
|
||||
/// Scans a file and produces a vector of transformed tokens.
|
||||
pub fn scan_file<P, F, T>(path: P, transform: F)
|
||||
-> Result<Vec<T>, LexerError>
|
||||
-> Result<TokenStream<T>, LexerError>
|
||||
where P: AsRef<std::path::Path>,
|
||||
F: FnOnce(&TokenStream) -> Vec<T>
|
||||
F: FnOnce(&TokenStream<TokenType>) -> TokenStream<T>
|
||||
{
|
||||
let mut cursor = Position::default();
|
||||
let mut stream = TokenStream::new();
|
||||
@ -123,8 +116,8 @@ impl Lexer
|
||||
|
||||
/// Scans a full in-memory string and returns transformed tokens.
|
||||
pub fn scan_text<F, T>(text: &str, transform: F)
|
||||
-> Result<Vec<T>, LexerError>
|
||||
where F: FnOnce(&TokenStream) -> Vec<T>
|
||||
-> Result<TokenStream<T>, LexerError>
|
||||
where F: FnOnce(&TokenStream<TokenType>) -> TokenStream<T>
|
||||
{
|
||||
let mut cursor = Position::default();
|
||||
let mut stream = TokenStream::new();
|
||||
@ -145,7 +138,7 @@ impl Lexer
|
||||
}
|
||||
|
||||
/// Internal: scans a single line of text into tokens.
|
||||
fn scan(line: &str, stream: &mut TokenStream, cursor: &mut Position)
|
||||
fn scan(line: &str, stream: &mut TokenStream<TokenType>, cursor: &mut Position)
|
||||
{
|
||||
for c in line.chars()
|
||||
{
|
||||
|
@ -61,6 +61,19 @@ impl Span
|
||||
Span { start: val,
|
||||
end: val }
|
||||
}
|
||||
|
||||
pub fn merge(a: Span, b: Span) -> Self
|
||||
{
|
||||
Span { start: a.start,
|
||||
end: b.end }
|
||||
}
|
||||
|
||||
pub fn merge_with(&self, other: Span) -> Span {
|
||||
Span {
|
||||
start: self.start,
|
||||
end: other.end,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Default for Span
|
||||
|
54
src/token.rs
54
src/token.rs
@ -51,13 +51,13 @@ pub enum TokenType
|
||||
/// Everything is in flat arrays for fast access
|
||||
/// and minimal cache misses.
|
||||
#[derive(Debug, Clone, Default)]
|
||||
pub struct TokenStream
|
||||
pub struct TokenStream<T>
|
||||
{
|
||||
/// The text of the `Token`.
|
||||
pub lexemes: Vec<String>,
|
||||
|
||||
/// The type of `Token`.
|
||||
pub variants: Vec<TokenType>,
|
||||
pub variants: Vec<T>,
|
||||
|
||||
/// The location of the `Token` in the file.
|
||||
pub locations: Vec<Span>
|
||||
@ -66,13 +66,13 @@ pub struct TokenStream
|
||||
|
||||
/// A `Token` found during the lexigraphical scan.
|
||||
#[derive(Debug)]
|
||||
pub struct Token<'a>
|
||||
pub struct Token<'a, T>
|
||||
{
|
||||
/// The characters of the `Token`.
|
||||
pub lexeme: &'a str,
|
||||
|
||||
/// The `Token`'s type.
|
||||
pub variant: &'a TokenType,
|
||||
pub variant: &'a T,
|
||||
|
||||
/// The location in the file of this `Token`.
|
||||
pub span: &'a Span
|
||||
@ -82,35 +82,35 @@ pub struct Token<'a>
|
||||
///
|
||||
/// This is the mutable reference.
|
||||
#[derive(Debug)]
|
||||
pub struct TokenMut<'a>
|
||||
pub struct TokenMut<'a, T>
|
||||
{
|
||||
/// The characters of the `Token`.
|
||||
pub lexeme: &'a mut str,
|
||||
/// The `Token`'s type.
|
||||
pub variant: &'a mut TokenType,
|
||||
pub variant: &'a mut T,
|
||||
/// The location for this `Token` in the file.
|
||||
pub span: &'a mut Span
|
||||
}
|
||||
|
||||
|
||||
/// An iterator over the Tokens in a `TokenStream`.
|
||||
pub struct TokenStreamIter<'a>
|
||||
pub struct TokenStreamIter<'a, T>
|
||||
{
|
||||
/// The stream to iterate over.
|
||||
stream: &'a TokenStream,
|
||||
stream: &'a TokenStream<T>,
|
||||
|
||||
/// The position in the stream.
|
||||
index: usize
|
||||
}
|
||||
|
||||
/// A mutable iterator over the Tokens in a `TokenStream`.
|
||||
pub struct TokenStreamIterMut<'a>
|
||||
pub struct TokenStreamIterMut<'a, T>
|
||||
{
|
||||
/// The characters of the `Token`.
|
||||
lexemes: std::slice::IterMut<'a, String>,
|
||||
|
||||
/// The `Token`'s type.
|
||||
variants: std::slice::IterMut<'a, TokenType>,
|
||||
variants: std::slice::IterMut<'a, T>,
|
||||
|
||||
/// The location for this `Token` in the file.
|
||||
locations: std::slice::IterMut<'a, Span>
|
||||
@ -118,7 +118,7 @@ pub struct TokenStreamIterMut<'a>
|
||||
|
||||
|
||||
|
||||
impl TokenStream
|
||||
impl<T> TokenStream<T>
|
||||
{
|
||||
pub fn new() -> Self
|
||||
{
|
||||
@ -137,7 +137,7 @@ impl TokenStream
|
||||
self.lexemes.is_empty()
|
||||
}
|
||||
|
||||
pub fn get(&self, index: usize) -> Option<Token<'_>>
|
||||
pub fn get(&self, index: usize) -> Option<Token<'_, T>>
|
||||
{
|
||||
if index < self.lexemes.len()
|
||||
{
|
||||
@ -151,20 +151,20 @@ impl TokenStream
|
||||
}
|
||||
}
|
||||
|
||||
pub fn iter(&self) -> TokenStreamIter<'_>
|
||||
pub fn iter(&self) -> TokenStreamIter<'_, T>
|
||||
{
|
||||
TokenStreamIter { stream: self,
|
||||
index: 0 }
|
||||
}
|
||||
|
||||
pub fn get_mut(&mut self, index: usize) -> Option<TokenMut<'_>>
|
||||
pub fn get_mut(&mut self, index: usize) -> Option<TokenMut<'_, T>>
|
||||
{
|
||||
if index < self.lexemes.len()
|
||||
{
|
||||
// SAFETY: We manually split the borrows to avoid
|
||||
// double mutable borrow.
|
||||
let lexeme = &mut self.lexemes[index] as *mut String;
|
||||
let variant = &mut self.variants[index] as *mut TokenType;
|
||||
let variant = &mut self.variants[index] as *mut T;
|
||||
let span = &mut self.locations[index] as *mut Span;
|
||||
|
||||
// Convert &mut String to &mut str safely.
|
||||
@ -189,14 +189,14 @@ impl TokenStream
|
||||
self.locations.clear();
|
||||
}
|
||||
|
||||
pub fn push(&mut self, lexeme: String, variant: TokenType, span: Span)
|
||||
pub fn push(&mut self, lexeme: String, variant: T, span: Span)
|
||||
{
|
||||
self.lexemes.push(lexeme);
|
||||
self.variants.push(variant);
|
||||
self.locations.push(span);
|
||||
}
|
||||
|
||||
pub fn iter_mut(&mut self) -> TokenStreamIterMut<'_>
|
||||
pub fn iter_mut(&mut self) -> TokenStreamIterMut<'_, T>
|
||||
{
|
||||
TokenStreamIterMut { lexemes: self.lexemes.iter_mut(),
|
||||
variants: self.variants.iter_mut(),
|
||||
@ -205,10 +205,10 @@ impl TokenStream
|
||||
}
|
||||
|
||||
|
||||
impl<'a> IntoIterator for &'a TokenStream
|
||||
impl<'a, T> IntoIterator for &'a TokenStream<T>
|
||||
{
|
||||
type IntoIter = TokenStreamIter<'a>;
|
||||
type Item = Token<'a>;
|
||||
type IntoIter = TokenStreamIter<'a, T>;
|
||||
type Item = Token<'a, T>;
|
||||
|
||||
fn into_iter(self) -> Self::IntoIter
|
||||
{
|
||||
@ -217,9 +217,9 @@ impl<'a> IntoIterator for &'a TokenStream
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> Iterator for TokenStreamIter<'a>
|
||||
impl<'a, T> Iterator for TokenStreamIter<'a, T>
|
||||
{
|
||||
type Item = Token<'a>;
|
||||
type Item = Token<'a, T>;
|
||||
|
||||
fn next(&mut self) -> Option<Self::Item>
|
||||
{
|
||||
@ -240,9 +240,9 @@ impl<'a> Iterator for TokenStreamIter<'a>
|
||||
}
|
||||
|
||||
|
||||
impl<'a> Iterator for TokenStreamIterMut<'a>
|
||||
impl<'a, T> Iterator for TokenStreamIterMut<'a, T>
|
||||
{
|
||||
type Item = TokenMut<'a>;
|
||||
type Item = TokenMut<'a, T>;
|
||||
|
||||
fn next(&mut self) -> Option<Self::Item>
|
||||
{
|
||||
@ -257,13 +257,13 @@ impl<'a> Iterator for TokenStreamIterMut<'a>
|
||||
}
|
||||
|
||||
|
||||
impl<'a> ::std::fmt::Display for Token<'a>
|
||||
impl<'a, T: std::fmt::Display> ::std::fmt::Display for Token<'a, T>
|
||||
{
|
||||
fn fmt(&self, f: &mut ::std::fmt::Formatter) -> ::std::fmt::Result
|
||||
{
|
||||
match *self.variant
|
||||
match self.lexeme
|
||||
{
|
||||
TokenType::Newline => write!(f, "[{}, {}]", self.variant, "\\n"),
|
||||
"\n" => write!(f, "[{}, {}]", self.variant, "\\n"),
|
||||
|
||||
_ => write!(f, "[{}: {}]", self.variant, self.lexeme)
|
||||
}
|
||||
|
Reference in New Issue
Block a user