[#2] A data-oriented Lexer.
I took the Token module from the Arcanum project and brought it over to here. It was a nice data oriented way of handling the Tokens. I then created a Lexer that can scan a file or text and allow the user to transform the scanned tokens before the final token array is returned. This should allow for more complex and specific tokens to be created for whatever domain is being targeted. I also added basic library examples and testing. Finally, I made sure the documentation generated nicely. This is now marked as version: 0.1.0
This commit is contained in:
135
tests/lexer_tests.rs
Normal file
135
tests/lexer_tests.rs
Normal file
@ -0,0 +1,135 @@
|
||||
use std::fs::{self, File};
|
||||
use std::io::Write;
|
||||
use std::path::PathBuf;
|
||||
|
||||
use rune::*;
|
||||
|
||||
|
||||
|
||||
struct TestCase<'a>
|
||||
{
|
||||
name: &'a str,
|
||||
input: &'a str,
|
||||
expected: Vec<(TokenType, &'a str)>
|
||||
}
|
||||
|
||||
|
||||
|
||||
fn dummy_transform(tokens: &TokenStream) -> Vec<(TokenType, String)>
|
||||
{
|
||||
let mut new_tokens = Vec::new();
|
||||
|
||||
for token in tokens
|
||||
{
|
||||
new_tokens.push((*token.variant, token.lexeme.to_string()));
|
||||
}
|
||||
|
||||
new_tokens
|
||||
}
|
||||
|
||||
fn write_temp_file(name: &str, content: &str) -> PathBuf
|
||||
{
|
||||
let mut path = std::env::temp_dir();
|
||||
path.push(format!("rune_test_{}.txt", name));
|
||||
let mut file = File::create(&path).expect("Failed to create temp file");
|
||||
write!(file, "{}", content).expect("Failed to write test content");
|
||||
path
|
||||
}
|
||||
|
||||
fn cleanup_temp_file(path: &PathBuf)
|
||||
{
|
||||
if path.exists()
|
||||
{
|
||||
let _ = fs::remove_file(path);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
#[test]
|
||||
fn test_basic_lexing()
|
||||
{
|
||||
let tokens =
|
||||
Lexer::scan_text("magic runes", dummy_transform).expect("Lexer should \
|
||||
succeed");
|
||||
|
||||
let expected = vec![(TokenType::Text, "magic".to_string()),
|
||||
(TokenType::Whitespace, " ".to_string()),
|
||||
(TokenType::Text, "runes".to_string()),
|
||||
(TokenType::Newline, "\n".to_string()),];
|
||||
|
||||
assert_eq!(tokens, expected);
|
||||
}
|
||||
|
||||
|
||||
#[test]
|
||||
fn test_symbols_and_numbers()
|
||||
{
|
||||
let tokens =
|
||||
Lexer::scan_text("13 + 37", dummy_transform).expect("Lexer should \
|
||||
succeed");
|
||||
|
||||
let expected = vec![(TokenType::Numeric, "13".into()),
|
||||
(TokenType::Whitespace, " ".into()),
|
||||
(TokenType::Symbol, "+".into()),
|
||||
(TokenType::Whitespace, " ".into()),
|
||||
(TokenType::Numeric, "37".into()),
|
||||
(TokenType::Newline, "\n".into()),];
|
||||
|
||||
assert_eq!(tokens, expected);
|
||||
}
|
||||
|
||||
|
||||
#[test]
|
||||
fn test_lexer_with_cases()
|
||||
{
|
||||
let cases = vec![TestCase { name: "simple_words",
|
||||
input: "magic rune",
|
||||
expected: vec![(TokenType::Text, "magic"),
|
||||
(TokenType::Whitespace, " "),
|
||||
(TokenType::Text, "rune"),
|
||||
(TokenType::Newline, "\n"),] },
|
||||
TestCase { name: "symbols_and_digits",
|
||||
input: "12 + 7",
|
||||
expected: vec![(TokenType::Numeric, "12"),
|
||||
(TokenType::Whitespace, " "),
|
||||
(TokenType::Symbol, "+"),
|
||||
(TokenType::Whitespace, " "),
|
||||
(TokenType::Numeric, "7"),
|
||||
(TokenType::Newline, "\n"),] },
|
||||
TestCase { name: "only_symbols",
|
||||
input: "###",
|
||||
expected: vec![(TokenType::Symbol, "#"),
|
||||
(TokenType::Symbol, "#"),
|
||||
(TokenType::Symbol, "#"),
|
||||
(TokenType::Newline, "\n"),] },
|
||||
TestCase { name: "whitespace_and_text",
|
||||
input: " spell",
|
||||
expected: vec![(TokenType::Whitespace, " "),
|
||||
(TokenType::Whitespace, " "),
|
||||
(TokenType::Whitespace, " "),
|
||||
(TokenType::Text, "spell"),
|
||||
(TokenType::Newline, "\n"),] },];
|
||||
|
||||
for case in cases
|
||||
{
|
||||
let path = write_temp_file(case.name, case.input);
|
||||
let result =
|
||||
Lexer::scan_file(&path, dummy_transform).expect(&format!("Lexer failed \
|
||||
on case '{}'",
|
||||
case.name));
|
||||
|
||||
let result_stripped: Vec<(TokenType, String)> = result;
|
||||
|
||||
let expected = case.expected
|
||||
.iter()
|
||||
.map(|(ty, s)| (*ty, s.to_string()))
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
assert_eq!(result_stripped, expected,
|
||||
"Mismatch in test case '{}'",
|
||||
case.name);
|
||||
|
||||
cleanup_temp_file(&path);
|
||||
}
|
||||
}
|
Reference in New Issue
Block a user