rune/examples/basic.rs

179 lines
4.8 KiB
Rust
Raw Normal View History

use std::path::PathBuf;
use rune::{Lexer, Span, TokenStream, TokenType};
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum MarkdownTokenType
{
Heading(u8),
EmphasisStart,
EmphasisEnd,
StrongStart,
StrongEnd,
CodeSpan,
Text,
Symbol,
Whitespace,
Newline,
Unknown
}
impl std::fmt::Display for MarkdownTokenType
{
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result
{
match self
{
MarkdownTokenType::Heading(level) => write!(f, "Heading({})", level),
MarkdownTokenType::EmphasisStart => write!(f, "EmphasisStart"),
MarkdownTokenType::EmphasisEnd => write!(f, "EmphasisEnd"),
MarkdownTokenType::StrongStart => write!(f, "StrongStart"),
MarkdownTokenType::StrongEnd => write!(f, "StrongEnd"),
MarkdownTokenType::CodeSpan => write!(f, "CodeSpan"),
MarkdownTokenType::Text => write!(f, "Text"),
MarkdownTokenType::Symbol => write!(f, "Symbol"),
MarkdownTokenType::Whitespace => write!(f, "Whitespace"),
MarkdownTokenType::Newline => write!(f, "Newline"),
MarkdownTokenType::Unknown => write!(f, "Unknown")
}
}
}
// Define how you want to interpret base tokens
pub fn transform(input: &TokenStream<TokenType>)
-> TokenStream<MarkdownTokenType>
{
let mut output = TokenStream::new();
let mut i = 0;
while i < input.len()
{
let token = input.get(i).unwrap(); // safe due to bounds check above
match token.variant
{
TokenType::Symbol if token.lexeme == "#" =>
{
// Count consecutive #s for heading level
let mut level = 1;
let mut span = token.span.clone();
while i + 1 < input.len()
{
let next = input.get(i + 1).unwrap();
if *next.variant == TokenType::Symbol && next.lexeme == "#"
{
level += 1;
span.end = next.span.end;
i += 1;
}
else
{
break;
}
}
output.push(token.lexeme.repeat(level),
MarkdownTokenType::Heading(level as u8),
span);
}
TokenType::Symbol if token.lexeme == "*" =>
{
// Look ahead to see if it's strong (**) or emphasis (*)
if i + 1 < input.len()
{
let next = input.get(i + 1).unwrap();
if *next.variant == TokenType::Symbol && next.lexeme == "*"
{
output.push("**".to_string(),
MarkdownTokenType::StrongStart,
Span::merge(*token.span, *next.span));
i += 1; // skip the second '*'
}
else
{
output.push("*".to_string(),
MarkdownTokenType::EmphasisStart,
token.span.clone());
}
}
else
{
output.push("*".to_string(),
MarkdownTokenType::EmphasisStart,
token.span.clone());
}
}
TokenType::Symbol if token.lexeme == "`" =>
{
output.push(token.lexeme.to_string(),
MarkdownTokenType::CodeSpan,
token.span.clone());
}
TokenType::Text =>
{
output.push(token.lexeme.to_string(),
MarkdownTokenType::Text,
token.span.clone());
}
TokenType::Symbol =>
{
output.push(token.lexeme.to_string(),
MarkdownTokenType::Symbol,
token.span.clone());
}
TokenType::Whitespace =>
{
output.push(token.lexeme.to_string(),
MarkdownTokenType::Whitespace,
token.span.clone());
}
TokenType::Newline =>
{
output.push(token.lexeme.to_string(),
MarkdownTokenType::Newline,
token.span.clone());
}
_ =>
{
output.push(token.lexeme.to_string(),
MarkdownTokenType::Unknown,
token.span.clone());
}
}
i += 1;
}
output
}
fn main() -> Result<(), Box<dyn std::error::Error>>
{
let mut path = PathBuf::from(env!("CARGO_MANIFEST_DIR"));
path.push("examples/example.md");
let tokens = Lexer::scan_file(path, transform)?;
// The tuple here is from the transform functions return type.
for token in &tokens
{
println!("{}", token);
}
Ok(())
}