2025-04-15 21:17:28 -04:00
|
|
|
use std::path::PathBuf;
|
|
|
|
|
2025-04-16 01:54:22 -04:00
|
|
|
use rune::{Lexer, Span, TokenStream, TokenType};
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#[derive(Debug, Clone, PartialEq, Eq)]
|
|
|
|
pub enum MarkdownTokenType {
|
|
|
|
Heading(u8),
|
|
|
|
EmphasisStart,
|
|
|
|
EmphasisEnd,
|
|
|
|
StrongStart,
|
|
|
|
StrongEnd,
|
|
|
|
CodeSpan,
|
|
|
|
Text,
|
|
|
|
Symbol,
|
|
|
|
Whitespace,
|
|
|
|
Newline,
|
|
|
|
Unknown,
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
impl std::fmt::Display for MarkdownTokenType {
|
|
|
|
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
|
|
|
match self {
|
|
|
|
MarkdownTokenType::Heading(level) => write!(f, "Heading({})", level),
|
|
|
|
MarkdownTokenType::EmphasisStart => write!(f, "EmphasisStart"),
|
|
|
|
MarkdownTokenType::EmphasisEnd => write!(f, "EmphasisEnd"),
|
|
|
|
MarkdownTokenType::StrongStart => write!(f, "StrongStart"),
|
|
|
|
MarkdownTokenType::StrongEnd => write!(f, "StrongEnd"),
|
|
|
|
MarkdownTokenType::CodeSpan => write!(f, "CodeSpan"),
|
|
|
|
MarkdownTokenType::Text => write!(f, "Text"),
|
|
|
|
MarkdownTokenType::Symbol => write!(f, "Symbol"),
|
|
|
|
MarkdownTokenType::Whitespace => write!(f, "Whitespace"),
|
|
|
|
MarkdownTokenType::Newline => write!(f, "Newline"),
|
|
|
|
MarkdownTokenType::Unknown => write!(f, "Unknown"),
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2025-04-15 21:17:28 -04:00
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
// Define how you want to interpret base tokens
|
2025-04-16 01:54:22 -04:00
|
|
|
pub fn transform(input: &TokenStream<TokenType>) -> TokenStream<MarkdownTokenType> {
|
|
|
|
let mut output = TokenStream::new();
|
2025-04-15 21:17:28 -04:00
|
|
|
|
2025-04-16 01:54:22 -04:00
|
|
|
let mut i = 0;
|
|
|
|
while i < input.len() {
|
|
|
|
let token = input.get(i).unwrap(); // safe due to bounds check above
|
|
|
|
|
|
|
|
match token.variant {
|
|
|
|
TokenType::Symbol if token.lexeme == "#" => {
|
|
|
|
// Count consecutive #s for heading level
|
|
|
|
let mut level = 1;
|
|
|
|
let mut span = token.span.clone();
|
|
|
|
|
|
|
|
while i + 1 < input.len() {
|
|
|
|
let next = input.get(i + 1).unwrap();
|
|
|
|
if *next.variant == TokenType::Symbol && next.lexeme == "#" {
|
|
|
|
level += 1;
|
|
|
|
span.end = next.span.end;
|
|
|
|
i += 1;
|
|
|
|
} else {
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
output.push(token.lexeme.repeat(level),
|
|
|
|
MarkdownTokenType::Heading(level as u8),
|
|
|
|
span);
|
|
|
|
}
|
|
|
|
|
|
|
|
TokenType::Symbol if token.lexeme == "*" => {
|
|
|
|
// Look ahead to see if it's strong (**) or emphasis (*)
|
|
|
|
if i + 1 < input.len() {
|
|
|
|
let next = input.get(i + 1).unwrap();
|
|
|
|
if *next.variant == TokenType::Symbol && next.lexeme == "*" {
|
|
|
|
output.push("**".to_string(),
|
|
|
|
MarkdownTokenType::StrongStart,
|
|
|
|
Span::merge(*token.span, *next.span));
|
|
|
|
i += 1; // skip the second '*'
|
|
|
|
} else {
|
|
|
|
output.push("*".to_string(),
|
|
|
|
MarkdownTokenType::EmphasisStart,
|
|
|
|
token.span.clone());
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
output.push("*".to_string(),
|
|
|
|
MarkdownTokenType::EmphasisStart,
|
|
|
|
token.span.clone());
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
TokenType::Symbol if token.lexeme == "`" => {
|
|
|
|
output.push(token.lexeme.to_string(),
|
|
|
|
MarkdownTokenType::CodeSpan,
|
|
|
|
token.span.clone());
|
|
|
|
}
|
|
|
|
|
|
|
|
TokenType::Text => {
|
|
|
|
output.push(token.lexeme.to_string(),
|
|
|
|
MarkdownTokenType::Text,
|
|
|
|
token.span.clone());
|
|
|
|
}
|
|
|
|
|
|
|
|
TokenType::Symbol => {
|
|
|
|
output.push(token.lexeme.to_string(),
|
|
|
|
MarkdownTokenType::Symbol,
|
|
|
|
token.span.clone());
|
|
|
|
}
|
|
|
|
|
|
|
|
TokenType::Whitespace => {
|
|
|
|
output.push(token.lexeme.to_string(),
|
|
|
|
MarkdownTokenType::Whitespace,
|
|
|
|
token.span.clone());
|
|
|
|
}
|
|
|
|
|
|
|
|
TokenType::Newline => {
|
|
|
|
output.push(token.lexeme.to_string(),
|
|
|
|
MarkdownTokenType::Newline,
|
|
|
|
token.span.clone());
|
|
|
|
}
|
|
|
|
|
|
|
|
_ => {
|
|
|
|
output.push(token.lexeme.to_string(),
|
|
|
|
MarkdownTokenType::Unknown,
|
|
|
|
token.span.clone());
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
i += 1;
|
|
|
|
}
|
2025-04-15 21:17:28 -04:00
|
|
|
|
2025-04-16 01:54:22 -04:00
|
|
|
output
|
2025-04-15 21:17:28 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
fn main() -> Result<(), Box<dyn std::error::Error>>
|
|
|
|
{
|
|
|
|
let mut path = PathBuf::from(env!("CARGO_MANIFEST_DIR"));
|
2025-04-16 01:54:22 -04:00
|
|
|
path.push("examples/example.md");
|
2025-04-15 21:17:28 -04:00
|
|
|
|
|
|
|
let tokens = Lexer::scan_file(path, transform)?;
|
|
|
|
|
|
|
|
// The tuple here is from the transform functions return type.
|
2025-04-16 01:54:22 -04:00
|
|
|
for token in &tokens
|
2025-04-15 21:17:28 -04:00
|
|
|
{
|
2025-04-16 01:54:22 -04:00
|
|
|
println!("{}", token);
|
2025-04-15 21:17:28 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
Ok(())
|
|
|
|
}
|