rune/examples/basic.rs

use std::path::PathBuf;

use rune::{Lexer, Span, TokenStream, TokenType};


#[derive(Debug, Clone, PartialEq, Eq)]
pub enum MarkdownTokenType
{
   Heading(u8),
   EmphasisStart,
   EmphasisEnd,
   StrongStart,
   StrongEnd,
   CodeSpan,
   Text,
   Symbol,
   Whitespace,
   Newline,
   Unknown
}


impl std::fmt::Display for MarkdownTokenType
{
   fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result
   {
      match self
      {
         MarkdownTokenType::Heading(level) => write!(f, "Heading({})", level),
         MarkdownTokenType::EmphasisStart => write!(f, "EmphasisStart"),
         MarkdownTokenType::EmphasisEnd => write!(f, "EmphasisEnd"),
         MarkdownTokenType::StrongStart => write!(f, "StrongStart"),
         MarkdownTokenType::StrongEnd => write!(f, "StrongEnd"),
         MarkdownTokenType::CodeSpan => write!(f, "CodeSpan"),
         MarkdownTokenType::Text => write!(f, "Text"),
         MarkdownTokenType::Symbol => write!(f, "Symbol"),
         MarkdownTokenType::Whitespace => write!(f, "Whitespace"),
         MarkdownTokenType::Newline => write!(f, "Newline"),
         MarkdownTokenType::Unknown => write!(f, "Unknown")
      }
   }
}


// Define how you want to interpret base tokens
pub fn transform(input: &TokenStream<TokenType>)
                 -> TokenStream<MarkdownTokenType>
{
   let mut output = TokenStream::new();

   let mut i = 0;
   while i < input.len()
   {
      let token = input.get(i).unwrap(); // safe due to bounds check above

      match token.variant
      {
         TokenType::Symbol if token.lexeme == "#" =>
         {
            // Count consecutive #s for heading level
            let mut level = 1;
            let mut span = token.span.clone();

            while i + 1 < input.len()
            {
               let next = input.get(i + 1).unwrap();
               if *next.variant == TokenType::Symbol && next.lexeme == "#"
               {
                  level += 1;
                  span.end = next.span.end;
                  i += 1;
               }
               else
               {
                  break;
               }
            }

            output.push(token.lexeme.repeat(level),
                        MarkdownTokenType::Heading(level as u8),
                        span);
         }

         TokenType::Symbol if token.lexeme == "*" =>
         {
            // Look ahead to see if it's strong (**) or emphasis (*)
            if i + 1 < input.len()
            {
               let next = input.get(i + 1).unwrap();
               if *next.variant == TokenType::Symbol && next.lexeme == "*"
               {
                  output.push("**".to_string(),
                              MarkdownTokenType::StrongStart,
                              Span::merge(*token.span, *next.span));
                  i += 1; // skip the second '*'
               }
               else
               {
                  output.push("*".to_string(),
                              MarkdownTokenType::EmphasisStart,
                              token.span.clone());
               }
            }
            else
            {
               output.push("*".to_string(),
                           MarkdownTokenType::EmphasisStart,
                           token.span.clone());
            }
         }

         TokenType::Symbol if token.lexeme == "`" =>
         {
            output.push(token.lexeme.to_string(),
                        MarkdownTokenType::CodeSpan,
                        token.span.clone());
         }

         TokenType::Text =>
         {
            output.push(token.lexeme.to_string(),
                        MarkdownTokenType::Text,
                        token.span.clone());
         }

         TokenType::Symbol =>
         {
            output.push(token.lexeme.to_string(),
                        MarkdownTokenType::Symbol,
                        token.span.clone());
         }

         TokenType::Whitespace =>
         {
            output.push(token.lexeme.to_string(),
                        MarkdownTokenType::Whitespace,
                        token.span.clone());
         }

         TokenType::Newline =>
         {
            output.push(token.lexeme.to_string(),
                        MarkdownTokenType::Newline,
                        token.span.clone());
         }

         _ =>
         {
            output.push(token.lexeme.to_string(),
                        MarkdownTokenType::Unknown,
                        token.span.clone());
         }
      }

      i += 1;
   }

   output
}


fn main() -> Result<(), Box<dyn std::error::Error>>
{
   let mut path = PathBuf::from(env!("CARGO_MANIFEST_DIR"));
   path.push("examples/example.md");

   let tokens = Lexer::scan_file(path, transform)?;

   // The tuple here is from the transform functions return type.
   for token in &tokens
   {
      println!("{}", token);
   }

   Ok(())
}
[#2] A data-oriented Lexer. I took the Token module from the Arcanum project and brought it over to here. It was a nice data oriented way of handling the Tokens. I then created a Lexer that can scan a file or text and allow the user to transform the scanned tokens before the final token array is returned. This should allow for more complex and specific tokens to be created for whatever domain is being targeted. I also added basic library examples and testing. Finally, I made sure the documentation generated nicely. This is now marked as version: 0.1.0 2025-04-15 21:17:28 -04:00			`use std::path::PathBuf;`

[#3] TokenStream now hold generic variants. This makes it so that the TokenStream and all it's associated Token types use a generic when dealing with the variant of the Token. Span was also given the ability to merge with another span. This will make it easier to track the span as users group TokenTypes together to make their domain specific types. All tests and examples were updated for this change. The version was incremented to 0.2.0. 2025-04-16 01:54:22 -04:00			`use rune::{Lexer, Span, TokenStream, TokenType};`



			`#[derive(Debug, Clone, PartialEq, Eq)]`
Just some basic updating and cleaning up. - Added comments. - Ran cargo fmt. - Updated the versioning. 2025-04-22 02:18:12 -04:00			`pub enum MarkdownTokenType`
			`{`
			`Heading(u8),`
			`EmphasisStart,`
			`EmphasisEnd,`
			`StrongStart,`
			`StrongEnd,`
			`CodeSpan,`
			`Text,`
			`Symbol,`
			`Whitespace,`
			`Newline,`
			`Unknown`
[#3] TokenStream now hold generic variants. This makes it so that the TokenStream and all it's associated Token types use a generic when dealing with the variant of the Token. Span was also given the ability to merge with another span. This will make it easier to track the span as users group TokenTypes together to make their domain specific types. All tests and examples were updated for this change. The version was incremented to 0.2.0. 2025-04-16 01:54:22 -04:00			`}`


Just some basic updating and cleaning up. - Added comments. - Ran cargo fmt. - Updated the versioning. 2025-04-22 02:18:12 -04:00			`impl std::fmt::Display for MarkdownTokenType`
			`{`
			`fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result`
			`{`
			`match self`
			`{`
			`MarkdownTokenType::Heading(level) => write!(f, "Heading({})", level),`
			`MarkdownTokenType::EmphasisStart => write!(f, "EmphasisStart"),`
			`MarkdownTokenType::EmphasisEnd => write!(f, "EmphasisEnd"),`
			`MarkdownTokenType::StrongStart => write!(f, "StrongStart"),`
			`MarkdownTokenType::StrongEnd => write!(f, "StrongEnd"),`
			`MarkdownTokenType::CodeSpan => write!(f, "CodeSpan"),`
			`MarkdownTokenType::Text => write!(f, "Text"),`
			`MarkdownTokenType::Symbol => write!(f, "Symbol"),`
			`MarkdownTokenType::Whitespace => write!(f, "Whitespace"),`
			`MarkdownTokenType::Newline => write!(f, "Newline"),`
			`MarkdownTokenType::Unknown => write!(f, "Unknown")`
			`}`
			`}`
[#3] TokenStream now hold generic variants. This makes it so that the TokenStream and all it's associated Token types use a generic when dealing with the variant of the Token. Span was also given the ability to merge with another span. This will make it easier to track the span as users group TokenTypes together to make their domain specific types. All tests and examples were updated for this change. The version was incremented to 0.2.0. 2025-04-16 01:54:22 -04:00			`}`
[#2] A data-oriented Lexer. I took the Token module from the Arcanum project and brought it over to here. It was a nice data oriented way of handling the Tokens. I then created a Lexer that can scan a file or text and allow the user to transform the scanned tokens before the final token array is returned. This should allow for more complex and specific tokens to be created for whatever domain is being targeted. I also added basic library examples and testing. Finally, I made sure the documentation generated nicely. This is now marked as version: 0.1.0 2025-04-15 21:17:28 -04:00


			`// Define how you want to interpret base tokens`
Just some basic updating and cleaning up. - Added comments. - Ran cargo fmt. - Updated the versioning. 2025-04-22 02:18:12 -04:00			`pub fn transform(input: &TokenStream<TokenType>)`
			`-> TokenStream<MarkdownTokenType>`
			`{`
			`let mut output = TokenStream::new();`
[#3] TokenStream now hold generic variants. This makes it so that the TokenStream and all it's associated Token types use a generic when dealing with the variant of the Token. Span was also given the ability to merge with another span. This will make it easier to track the span as users group TokenTypes together to make their domain specific types. All tests and examples were updated for this change. The version was incremented to 0.2.0. 2025-04-16 01:54:22 -04:00
Just some basic updating and cleaning up. - Added comments. - Ran cargo fmt. - Updated the versioning. 2025-04-22 02:18:12 -04:00			`let mut i = 0;`
			`while i < input.len()`
			`{`
			`let token = input.get(i).unwrap(); // safe due to bounds check above`

			`match token.variant`
			`{`
			`TokenType::Symbol if token.lexeme == "#" =>`
			`{`
			`// Count consecutive #s for heading level`
			`let mut level = 1;`
			`let mut span = token.span.clone();`

			`while i + 1 < input.len()`
			`{`
			`let next = input.get(i + 1).unwrap();`
			`if *next.variant == TokenType::Symbol && next.lexeme == "#"`
			`{`
			`level += 1;`
			`span.end = next.span.end;`
			`i += 1;`
			`}`
			`else`
			`{`
			`break;`
			`}`
[#3] TokenStream now hold generic variants. This makes it so that the TokenStream and all it's associated Token types use a generic when dealing with the variant of the Token. Span was also given the ability to merge with another span. This will make it easier to track the span as users group TokenTypes together to make their domain specific types. All tests and examples were updated for this change. The version was incremented to 0.2.0. 2025-04-16 01:54:22 -04:00			`}`

Just some basic updating and cleaning up. - Added comments. - Ran cargo fmt. - Updated the versioning. 2025-04-22 02:18:12 -04:00			`output.push(token.lexeme.repeat(level),`
			`MarkdownTokenType::Heading(level as u8),`
			`span);`
			`}`

			`TokenType::Symbol if token.lexeme == "*" =>`
			`{`
			`// Look ahead to see if it's strong (*) or emphasis ()`
			`if i + 1 < input.len()`
			`{`
			`let next = input.get(i + 1).unwrap();`
			`if next.variant == TokenType::Symbol && next.lexeme == ""`
			`{`
			`output.push("**".to_string(),`
			`MarkdownTokenType::StrongStart,`
			`Span::merge(token.span, next.span));`
			`i += 1; // skip the second '*'`
			`}`
			`else`
			`{`
			`output.push("*".to_string(),`
			`MarkdownTokenType::EmphasisStart,`
			`token.span.clone());`
			`}`
[#3] TokenStream now hold generic variants. This makes it so that the TokenStream and all it's associated Token types use a generic when dealing with the variant of the Token. Span was also given the ability to merge with another span. This will make it easier to track the span as users group TokenTypes together to make their domain specific types. All tests and examples were updated for this change. The version was incremented to 0.2.0. 2025-04-16 01:54:22 -04:00			`}`
Just some basic updating and cleaning up. - Added comments. - Ran cargo fmt. - Updated the versioning. 2025-04-22 02:18:12 -04:00			`else`
			`{`
			`output.push("*".to_string(),`
			`MarkdownTokenType::EmphasisStart,`
			`token.span.clone());`
[#3] TokenStream now hold generic variants. This makes it so that the TokenStream and all it's associated Token types use a generic when dealing with the variant of the Token. Span was also given the ability to merge with another span. This will make it easier to track the span as users group TokenTypes together to make their domain specific types. All tests and examples were updated for this change. The version was incremented to 0.2.0. 2025-04-16 01:54:22 -04:00			`}`
Just some basic updating and cleaning up. - Added comments. - Ran cargo fmt. - Updated the versioning. 2025-04-22 02:18:12 -04:00			`}`

			TokenType::Symbol if token.lexeme == "`" =>
			`{`
			`output.push(token.lexeme.to_string(),`
			`MarkdownTokenType::CodeSpan,`
			`token.span.clone());`
			`}`

			`TokenType::Text =>`
			`{`
			`output.push(token.lexeme.to_string(),`
			`MarkdownTokenType::Text,`
			`token.span.clone());`
			`}`

			`TokenType::Symbol =>`
			`{`
			`output.push(token.lexeme.to_string(),`
			`MarkdownTokenType::Symbol,`
			`token.span.clone());`
			`}`

			`TokenType::Whitespace =>`
			`{`
			`output.push(token.lexeme.to_string(),`
			`MarkdownTokenType::Whitespace,`
			`token.span.clone());`
			`}`

			`TokenType::Newline =>`
			`{`
			`output.push(token.lexeme.to_string(),`
			`MarkdownTokenType::Newline,`
			`token.span.clone());`
			`}`

			`_ =>`
			`{`
			`output.push(token.lexeme.to_string(),`
			`MarkdownTokenType::Unknown,`
			`token.span.clone());`
			`}`
			`}`

			`i += 1;`
			`}`
[#2] A data-oriented Lexer. I took the Token module from the Arcanum project and brought it over to here. It was a nice data oriented way of handling the Tokens. I then created a Lexer that can scan a file or text and allow the user to transform the scanned tokens before the final token array is returned. This should allow for more complex and specific tokens to be created for whatever domain is being targeted. I also added basic library examples and testing. Finally, I made sure the documentation generated nicely. This is now marked as version: 0.1.0 2025-04-15 21:17:28 -04:00
Just some basic updating and cleaning up. - Added comments. - Ran cargo fmt. - Updated the versioning. 2025-04-22 02:18:12 -04:00			`output`
[#2] A data-oriented Lexer. I took the Token module from the Arcanum project and brought it over to here. It was a nice data oriented way of handling the Tokens. I then created a Lexer that can scan a file or text and allow the user to transform the scanned tokens before the final token array is returned. This should allow for more complex and specific tokens to be created for whatever domain is being targeted. I also added basic library examples and testing. Finally, I made sure the documentation generated nicely. This is now marked as version: 0.1.0 2025-04-15 21:17:28 -04:00			`}`


			`fn main() -> Result<(), Box<dyn std::error::Error>>`
			`{`
			`let mut path = PathBuf::from(env!("CARGO_MANIFEST_DIR"));`
[#3] TokenStream now hold generic variants. This makes it so that the TokenStream and all it's associated Token types use a generic when dealing with the variant of the Token. Span was also given the ability to merge with another span. This will make it easier to track the span as users group TokenTypes together to make their domain specific types. All tests and examples were updated for this change. The version was incremented to 0.2.0. 2025-04-16 01:54:22 -04:00			`path.push("examples/example.md");`
[#2] A data-oriented Lexer. I took the Token module from the Arcanum project and brought it over to here. It was a nice data oriented way of handling the Tokens. I then created a Lexer that can scan a file or text and allow the user to transform the scanned tokens before the final token array is returned. This should allow for more complex and specific tokens to be created for whatever domain is being targeted. I also added basic library examples and testing. Finally, I made sure the documentation generated nicely. This is now marked as version: 0.1.0 2025-04-15 21:17:28 -04:00
			`let tokens = Lexer::scan_file(path, transform)?;`

			`// The tuple here is from the transform functions return type.`
[#3] TokenStream now hold generic variants. This makes it so that the TokenStream and all it's associated Token types use a generic when dealing with the variant of the Token. Span was also given the ability to merge with another span. This will make it easier to track the span as users group TokenTypes together to make their domain specific types. All tests and examples were updated for this change. The version was incremented to 0.2.0. 2025-04-16 01:54:22 -04:00			`for token in &tokens`
[#2] A data-oriented Lexer. I took the Token module from the Arcanum project and brought it over to here. It was a nice data oriented way of handling the Tokens. I then created a Lexer that can scan a file or text and allow the user to transform the scanned tokens before the final token array is returned. This should allow for more complex and specific tokens to be created for whatever domain is being targeted. I also added basic library examples and testing. Finally, I made sure the documentation generated nicely. This is now marked as version: 0.1.0 2025-04-15 21:17:28 -04:00			`{`
[#3] TokenStream now hold generic variants. This makes it so that the TokenStream and all it's associated Token types use a generic when dealing with the variant of the Token. Span was also given the ability to merge with another span. This will make it easier to track the span as users group TokenTypes together to make their domain specific types. All tests and examples were updated for this change. The version was incremented to 0.2.0. 2025-04-16 01:54:22 -04:00			`println!("{}", token);`
[#2] A data-oriented Lexer. I took the Token module from the Arcanum project and brought it over to here. It was a nice data oriented way of handling the Tokens. I then created a Lexer that can scan a file or text and allow the user to transform the scanned tokens before the final token array is returned. This should allow for more complex and specific tokens to be created for whatever domain is being targeted. I also added basic library examples and testing. Finally, I made sure the documentation generated nicely. This is now marked as version: 0.1.0 2025-04-15 21:17:28 -04:00			`}`

			`Ok(())`
			`}`