use std::path::PathBuf; use rune::{Lexer, Span, TokenStream, TokenType}; #[derive(Debug, Clone, PartialEq, Eq)] pub enum MarkdownTokenType { Heading(u8), EmphasisStart, EmphasisEnd, StrongStart, StrongEnd, CodeSpan, Text, Symbol, Whitespace, Newline, Unknown, } impl std::fmt::Display for MarkdownTokenType { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { MarkdownTokenType::Heading(level) => write!(f, "Heading({})", level), MarkdownTokenType::EmphasisStart => write!(f, "EmphasisStart"), MarkdownTokenType::EmphasisEnd => write!(f, "EmphasisEnd"), MarkdownTokenType::StrongStart => write!(f, "StrongStart"), MarkdownTokenType::StrongEnd => write!(f, "StrongEnd"), MarkdownTokenType::CodeSpan => write!(f, "CodeSpan"), MarkdownTokenType::Text => write!(f, "Text"), MarkdownTokenType::Symbol => write!(f, "Symbol"), MarkdownTokenType::Whitespace => write!(f, "Whitespace"), MarkdownTokenType::Newline => write!(f, "Newline"), MarkdownTokenType::Unknown => write!(f, "Unknown"), } } } // Define how you want to interpret base tokens pub fn transform(input: &TokenStream) -> TokenStream { let mut output = TokenStream::new(); let mut i = 0; while i < input.len() { let token = input.get(i).unwrap(); // safe due to bounds check above match token.variant { TokenType::Symbol if token.lexeme == "#" => { // Count consecutive #s for heading level let mut level = 1; let mut span = token.span.clone(); while i + 1 < input.len() { let next = input.get(i + 1).unwrap(); if *next.variant == TokenType::Symbol && next.lexeme == "#" { level += 1; span.end = next.span.end; i += 1; } else { break; } } output.push(token.lexeme.repeat(level), MarkdownTokenType::Heading(level as u8), span); } TokenType::Symbol if token.lexeme == "*" => { // Look ahead to see if it's strong (**) or emphasis (*) if i + 1 < input.len() { let next = input.get(i + 1).unwrap(); if *next.variant == TokenType::Symbol && next.lexeme == "*" { output.push("**".to_string(), MarkdownTokenType::StrongStart, Span::merge(*token.span, *next.span)); i += 1; // skip the second '*' } else { output.push("*".to_string(), MarkdownTokenType::EmphasisStart, token.span.clone()); } } else { output.push("*".to_string(), MarkdownTokenType::EmphasisStart, token.span.clone()); } } TokenType::Symbol if token.lexeme == "`" => { output.push(token.lexeme.to_string(), MarkdownTokenType::CodeSpan, token.span.clone()); } TokenType::Text => { output.push(token.lexeme.to_string(), MarkdownTokenType::Text, token.span.clone()); } TokenType::Symbol => { output.push(token.lexeme.to_string(), MarkdownTokenType::Symbol, token.span.clone()); } TokenType::Whitespace => { output.push(token.lexeme.to_string(), MarkdownTokenType::Whitespace, token.span.clone()); } TokenType::Newline => { output.push(token.lexeme.to_string(), MarkdownTokenType::Newline, token.span.clone()); } _ => { output.push(token.lexeme.to_string(), MarkdownTokenType::Unknown, token.span.clone()); } } i += 1; } output } fn main() -> Result<(), Box> { let mut path = PathBuf::from(env!("CARGO_MANIFEST_DIR")); path.push("examples/example.md"); let tokens = Lexer::scan_file(path, transform)?; // The tuple here is from the transform functions return type. for token in &tokens { println!("{}", token); } Ok(()) }