The compiler can currently handle the include keyword.

Skipping the Lexer, the parser can find and replace the include statement
as long as it is on a line by itself.
This commit is contained in:
Myrddin Dundragon 2016-07-26 18:30:47 -04:00
parent 5103ed2971
commit 99a816e2a5
11 changed files with 887 additions and 111 deletions

View File

@ -25,9 +25,20 @@ pub const TEST_OUTPUT_FILENAME: &'static str = "test.rs";
///
pub fn main()
{
let mut compiler: Compiler;
let mut include: PathBuf;
let mut input: PathBuf;
let mut output: PathBuf;
// Create a new compiler.
compiler = Compiler::new();
// Add the resource directory as an include directory
// for the compiler.
include = PathBuf::from(env!("CARGO_MANIFEST_DIR"));
include.push(RESOURCE_DIR);
compiler.register_include_dir(&include);
// Get the input file to test with. It is in the
// resources directory.
input = PathBuf::from(env!("CARGO_MANIFEST_DIR"));
@ -41,5 +52,5 @@ pub fn main()
output.push(TEST_OUTPUT_FILENAME);
println!("Compiling {:?} to {:?}", input, output);
Compiler::compile(input, output);
compiler.compile(input, output);
}

View File

@ -2,8 +2,11 @@ use std::fs::File;
use std::io::Write;
use std::path::Path;
use super::lexer::Lexer;
use super::reader::Reader;
use ::lexer::Lexer;
use ::parser::Parser;
use ::reader::Reader;
use ::util::Util;
const FILE_HEADER: &'static str =
@ -14,41 +17,39 @@ const FILE_HEADER: &'static str =
///
pub enum Compiler
pub struct Compiler
{
util: Util
}
/// Reads an input File and parses it.
fn read_file(input_path: &Path) -> String
pub fn read_file(util: &Util, input_path: &Path) -> String
{
let mut output: String;
let mut reader: Reader;
let mut lines: Vec<String>;
output = String::from(FILE_HEADER);
// Create the output string of the compiled file.
output = String::new();
// Create a Reader from the given input file.
reader = Reader::from_file(input_path);
// Use the Lexer to scan the Reader's
// buffer into tokens.
match Lexer::scan(reader)
{
Ok(tokens) =>
{
for token in tokens
{
output.push_str(&token);
}
}
Lexer::scan(&mut reader);
Err(error) =>
{
error!("{}", error);
}
}
/*
// Parse the file and turn it into a set of compiled lines.
lines = Parser::parse(&util, &mut reader);
// Add all these lines to the final output.
for line in lines.into_iter()
{
output.push_str(&line);
}
*/
// Return the final output.
output
}
@ -90,14 +91,42 @@ fn write_file(output_path: &Path, output: String)
impl Compiler
{
/// Compile
pub fn compile<F>(input: F, output: F)
/// Create a new Compiler to use.
pub fn new() -> Compiler
{
Compiler
{
util: Util::new()
}
}
/// Register an include directory.
pub fn register_include_dir(&mut self, include_dir: &Path)
{
self.util.register_include_dir(include_dir);
}
/// Register a list of include directories.
pub fn register_include_dirs(&mut self, include_dirs: Vec<&Path>)
{
for dir in include_dirs.iter()
{
self.register_include_dir(dir);
}
}
/// Compile a given input file.
pub fn compile<F>(&self, input: F, output: F)
where F: AsRef<Path>
{
let output_string: String;
let mut output_string: String;
// Create the output string of the compiled file.
// Start it with the compilation header.
output_string = String::from(FILE_HEADER);
// Turn the input file into a compiled String.
output_string = read_file(input.as_ref());
output_string.push_str(&read_file(&self.util, input.as_ref()));
// Write the compiled output file.
write_file(output.as_ref(), output_string);

View File

@ -1,66 +0,0 @@
use super::reader::Reader;
///
pub enum Lexer
{
}
/// Determines if a character is alphabetic.
fn is_alpha(c: char) -> bool
{
c.is_alphabetic()
}
/// Determines if a character is a numeral
fn is_numeric(c: char) -> bool
{
c.is_numeric()
}
/// Determines if a character is whitespace.
fn is_whitespace(c: char) -> bool
{
c.is_whitespace()
}
/// Returns true if the next set of characters starts with
/// the given pattern; Otherwise, false.
fn starts_with<P>(pattern: P) -> bool
where P: AsRef<str>
{
false
}
/// Consume and discard zero or more whitespace characters.
fn consume_whitespace()
{
consume_while(is_whitespace);
}
/// Consume characters until the test returns false.
fn consume_while<F>(test: F) -> String
where F: Fn(char) -> bool
{
String::new()
}
impl Lexer
{
/// Scan the data held by the Reader. This will consume the Reader and
/// clear it.
pub fn scan(reader: Reader) -> Result<Vec<String>, String>
{
let tokens: Vec<String>;
// Create a new list to hold all the discovered tokens.
tokens = Vec::new();
// Return the scanned tokens.
Ok(tokens)
}
}

377
src/lexer/lexer.rs Normal file
View File

@ -0,0 +1,377 @@
use ::reader::Reader;
use ::lexer::token::Token;
use ::lexer::token_types::TokenTypes;
/// The keywords for this language.
const keywords: [&'static str; 1] = ["include"];
/// Performs lexigraphical analysis on
/// the text given from a Reader.
pub enum Lexer
{
}
/// Determines if a character is alphabetic.
fn is_alpha(c: char) -> bool
{
c.is_alphabetic()
}
/// Determines if a character is a numeral
fn is_numeric(c: char) -> bool
{
c.is_numeric()
}
/// Determines if a character is a numeral
fn is_alphanumeric(c: char) -> bool
{
is_alpha(c) | is_numeric(c)
}
/// Determines if a character is whitespace.
fn is_whitespace(c: char) -> bool
{
c.is_whitespace()
}
fn is_non_whitespace(c: char) -> bool
{
!is_whitespace(c)
}
/// Determines if a character is a symbol.
fn is_symbol(c: char) -> bool
{
is_whitespace(c) == false && is_alphanumeric(c) == false
}
/// Determines if a String is a keyword.
fn is_keyword(s: String) -> bool
{
for keyword in keywords.iter()
{
if &s == keyword
{
return true
}
}
false
}
/// Returns true if the next set of characters starts with
/// the given pattern; Otherwise, false.
fn starts_with<P>(pattern: P) -> bool
where P: AsRef<str>
{
false
}
/// Consume and discard zero or more whitespace characters.
fn consume_whitespace(reader: &mut Reader) -> String
{
consume_while(reader, is_whitespace)
}
///
fn consume_symbol(reader: &mut Reader) -> String
{
let mut is_valid: bool;
let mut buffer: String;
// Create a buffer to store all the consumed characters.
buffer = String::new();
// Check if the first character from the Reader
// is a symbol character.
match reader.get_char()
{
Ok(read_character) =>
{
is_valid = is_symbol(read_character);
}
Err(error) =>
{
warn!("{}", error);
is_valid = false;
}
}
if is_valid == true
{
// Consume this character that passed.
match reader.consume_char()
{
Ok(character) =>
{
// Add this consumed character to the buffer.
buffer.push(character);
}
Err(error) =>
{
error!("{}", error);
}
}
}
// Return the buffer of consumed characters.
buffer
}
///
fn consume_numeral(reader: &mut Reader) -> String
{
let mut is_valid: bool;
let mut buffer: String;
// Create a buffer to store all the consumed characters.
buffer = String::new();
// Check if the first character from the Reader
// is a numeric character.
match reader.get_char()
{
Ok(read_character) =>
{
is_valid = is_numeric(read_character);
}
Err(error) =>
{
warn!("{}", error);
is_valid = false;
}
}
// If the first character is valid, then consume it and
// consume any numeric characters that follow.
if is_valid == true
{
// Consume this character that passed.
match reader.consume_char()
{
Ok(character) =>
{
// Add this consumed character to the buffer.
buffer.push(character);
}
Err(error) =>
{
error!("{}", error);
}
}
// Now read any numeric characters that follow.
buffer.push_str(&consume_while(reader, is_non_whitespace));
}
// Return the buffer of consumed characters.
buffer
}
///
fn consume_identifier(reader: &mut Reader) -> String
{
let mut is_valid: bool;
let mut buffer: String;
// Create a buffer to store all the consumed characters.
buffer = String::new();
// Check if the first character from the Reader
// is an alpha character.
match reader.get_char()
{
Ok(read_character) =>
{
is_valid = is_alpha(read_character);
}
Err(error) =>
{
warn!("{}", error);
is_valid = false;
}
}
// If the first character is valid, then consume it and
// consume any alphanumeric character the follow.
if is_valid == true
{
// Consume this character that passed.
match reader.consume_char()
{
Ok(character) =>
{
// Add this consumed character to the buffer.
buffer.push(character);
}
Err(error) =>
{
error!("{}", error);
}
}
// Now read any alphanumeric characters that follow.
buffer.push_str(&consume_while(reader, is_alphanumeric));
}
// Return the buffer of consumed characters.
buffer
}
/// Consume characters until the test returns false.
fn consume_while<F>(reader: &mut Reader, test: F) -> String
where F: Fn(char) -> bool
{
let mut is_valid: bool;
let mut buffer: String;
// Create a buffer to store all the consumed characters.
buffer = String::new();
// Get a character from the reader.
match reader.get_char()
{
Ok(read_character) =>
{
is_valid = test(read_character);
}
Err(error) =>
{
warn!("{}", error);
is_valid = false;
}
}
// Loop until we get to invalid input.
while is_valid == true
{
// Consume this character that passed.
match reader.consume_char()
{
Ok(character) =>
{
// Add this consumed character to the buffer.
buffer.push(character);
// Get a character from the reader.
match reader.get_char()
{
Ok(read_character) =>
{
is_valid = test(read_character);
}
Err(error) =>
{
warn!("{}", error);
is_valid = false;
}
}
}
Err(error) =>
{
is_valid = false;
error!("{}", error);
}
}
}
// Return the buffer of consumed characters.
buffer
}
impl Lexer
{
/// Scan the data held by the Reader. This will consume the Reader and
/// clear it.
pub fn scan(reader: &mut Reader) -> Result<Vec<Token>, &'static str>
{
let mut test_char: char;
let mut token: Token;
let mut tokens: Vec<Token>;
// Create a new list to hold all the discovered tokens.
tokens = Vec::new();
// Begin turning the input into tokens.
while reader.is_eob() == false
{
test_char = try!(reader.get_char());
if is_whitespace(test_char)
{
// Just skip/remove any whitespace.
token = Token::from(consume_whitespace(reader));
token.set_type(TokenTypes::Whitespace);
// Push the token into the list.
tokens.push(token);
}
else if is_numeric(test_char)
{
// The next character is a numeric,
// so we are probably looking at a numeral.
token = Token::from(consume_numeral(reader));
token.set_type(TokenTypes::Numeric);
// Push the token into the list.
tokens.push(token);
}
else if is_alpha(test_char)
{
// The next character is an alpha character,
// so we are probably looking at an identifier.
token = Token::from(consume_identifier(reader));
// Determine if this identifier is a keyword.
if is_keyword(token.to_string()) == true
{
token.set_type(TokenTypes::Keyword);
}
else
{
token.set_type(TokenTypes::Identifier);
}
// Push the token into the list.
tokens.push(token);
}
else
{
// This must be some kind of symbol.
token = Token::from(consume_symbol(reader));
token.set_type(TokenTypes::Unknown);
// Push the token into the list.
tokens.push(token);
}
}
// Clear the reader since we are finished with it.
reader.clear();
for token in tokens.iter()
{
println!("{}: {}", token.get_type(), token);
}
// Return the scanned tokens.
Ok(tokens)
}
}

9
src/lexer/mod.rs Normal file
View File

@ -0,0 +1,9 @@
mod lexer;
mod token;
mod token_types;
pub use self::lexer::Lexer;
pub use self::token::Token;
pub use self::token_types::TokenTypes;

103
src/lexer/token.rs Normal file
View File

@ -0,0 +1,103 @@
use std::convert::From;
use std::str::FromStr;
use ::lexer::token_types::TokenTypes;
///
pub struct Token
{
///
string: String,
///
variant: TokenTypes
}
impl Token
{
///
pub fn new() -> Token
{
Token::from("")
}
///
pub fn get_type(&self) -> TokenTypes
{
self.variant
}
///
pub fn set_type(&mut self, token_type: TokenTypes)
{
self.variant = token_type;
}
}
impl ::std::str::FromStr for Token
{
type Err = String;
fn from_str(s: &str) -> Result<Token, String>
{
let token: Token;
// Create the new Token from the given &str.
token =
Token
{
string: String::from(s),
variant: TokenTypes::Unknown
};
// Return the token.
Ok(token)
}
}
impl<'a> ::std::convert::From<&'a str> for Token
{
fn from(s: &'a str) -> Self
{
// Just call the FromStr and handle the error.
match Token::from_str(s)
{
Ok(token) =>
{
token
}
Err(error) =>
{
// Just warn and create a blank unknown Token.
warn!("{}", error);
Token
{
string: String::new(),
variant: TokenTypes::Unknown
}
}
}
}
}
impl From<String> for Token
{
fn from(s: String) -> Self
{
// Just call the From<&str>.
Token::from(s.as_str())
}
}
impl ::std::fmt::Display for Token
{
fn fmt(&self, f: &mut ::std::fmt::Formatter) -> ::std::fmt::Result
{
write!(f, "{}", self.string)
}
}

104
src/lexer/token_types.rs Normal file
View File

@ -0,0 +1,104 @@
/// The different types of Tokens.
#[derive(Clone, Copy, PartialEq, PartialOrd, Eq, Ord)]
pub enum TokenTypes
{
/// A Comment token.
Comment,
/// An Identifier that has been confirmed
/// to be a Keyword.
Keyword,
/// An alpha character followed by
/// zero or more of alpha characters, numeric characters,
/// or underscores. After the first character these can be
/// mixed in any order.
Identifier,
/// A numeric character followed by
/// zero or more numeric characters, zero or one period,
/// and zero or one appendable type identifies.
Numeric,
/// A set of characters within double quote marks.
/// These follow Rust's standard of escape characters.
StaticString,
/// This token is just whitespace,
/// one or more space characters.
Whitespace,
/// This token has something, but we don't know what.
/// This should not occur.
Unknown
}
impl TokenTypes
{
/// Get a str representation of this variant.
pub fn to_str(&self) -> &'static str
{
match *self
{
TokenTypes::Comment =>
{
"Comment"
}
TokenTypes::Keyword =>
{
"Keyword"
}
TokenTypes::Identifier =>
{
"Identifier"
}
TokenTypes::Numeric =>
{
"Numeric"
}
TokenTypes::StaticString =>
{
"Unknown"
}
TokenTypes::Whitespace =>
{
"Whitespace"
}
TokenTypes::Unknown =>
{
"Unknown"
}
}
}
/// Get a String representation of this variant.
pub fn to_string(&self) -> String
{
String::from(self.to_str())
}
}
impl ::std::fmt::Debug for TokenTypes
{
fn fmt(&self, f: &mut ::std::fmt::Formatter) -> ::std::fmt::Result
{
write!(f, "{}", self.to_str())
}
}
impl ::std::fmt::Display for TokenTypes
{
fn fmt(&self, f: &mut ::std::fmt::Formatter) -> ::std::fmt::Result
{
write!(f, "{}", self.to_str())
}
}

View File

@ -7,6 +7,7 @@ mod compiler;
mod lexer;
mod parser;
mod reader;
mod util;

View File

@ -1,11 +1,90 @@
///
pub struct Parser
{
///
position: usize,
use std::path::PathBuf;
use ::compiler::read_file;
use ::reader::Reader;
use ::util::Util;
///
input: String
pub enum Parser
{
}
///
fn process_line(util: &Util, output: &mut Vec<String>, line: String)
{
let start: usize;
let end: usize;
let mut include_file: PathBuf;
let mut processed_line: String;
// Copy the given line.
processed_line = line.clone();
// Remove any code block brackets.
processed_line = processed_line.replace("{@", "");
processed_line = processed_line.replace("@}", "");
// Just handling the include code right now.
if processed_line.contains("include") == true
{
// Parse one or more include statements on a line.
// Cut out the include statement so it can be processed.
match processed_line.find("\"")
{
Some(location) =>
{
// Add one to move past the quote mark.
start = location + 1usize;
}
None =>
{
start = 0usize;
}
}
match processed_line[start..].find("\"")
{
Some(location) =>
{
end = start + location;
}
None =>
{
end = start + 0usize;
}
}
include_file = PathBuf::from(&processed_line[start..end]);
// Try to find the include file in one of the
// include directories.
match util.get_include_file_path(&include_file)
{
Some(file) =>
{
// Process the include file found and add it to
// the output list.
output.push(read_file(util, &file));
}
None =>
{
error!("Unable to find desired include file: {:?}",
include_file);
}
}
}
else
{
output.push(processed_line);
}
}
@ -13,15 +92,31 @@ pub struct Parser
impl Parser
{
///
pub fn parse(&mut self)
pub fn parse(util: &Util, reader: &mut Reader) -> Vec<String>
{
let mut output: Vec<String>;
// Create a new set of lines to hold the parsed data.
output = Vec::new();
// Start parsing all the given lines.
while reader.is_eob() == false
{
match reader.consume_line()
{
Ok(line) =>
{
process_line(util, &mut output, line)
}
/// Returns true if the next set of characters starts with
/// the given pattern; Otherwise, false.
pub fn starts_with<P>(&self, pattern: P) -> bool
where P: AsRef<str>
Err(error) =>
{
self.position >= self.input.len()
error!("{}", error);
}
}
}
// Return the parsed input.
output
}
}

View File

@ -129,12 +129,15 @@ impl Reader
/// Get the current character.
pub fn get_char(&self) -> Result<char, &'static str>
{
if self.is_eof() == false
// Make sure we are not at the end of the buffer.
if self.is_eob() == false
{
// Return the character at the current position.
Ok(self.buffer[self.position])
}
else
{
// There was an error.
Err("Unable to read character. \
No characters left in the input buffer.")
}
@ -146,15 +149,76 @@ impl Reader
{
let character: char;
// Get the current character and move the position
// in the buffer forward.
character = try!(self.get_char());
self.position += 1;
// Return the character that was retrieved.
Ok(character)
}
///
fn is_eof(&self) -> bool
/// Get a line of text from the current character to the next
/// newline character, including the current character.
pub fn get_line(&mut self) -> Result<String, &'static str>
{
let mut index: usize;
let mut buffer: String;
// Create a new buffer to hold the line created.
buffer = String::new();
// Now get the current position the reader is at.
index = self.position;
while index < self.buffer.len() && self.buffer[index] != '\n'
{
buffer.push(self.buffer[index]);
index += 1;
}
// Read the newline character if the reader did not run out of buffer.
if index < self.buffer.len() && self.buffer[index] == '\n'
{
buffer.push(self.buffer[index]);
}
// Return the buffer of characters that was created.
Ok(buffer)
}
/// Get a line of text from the current character to the next
/// newline character, including the current character and newline.
/// This will consume all the characters and move the position
/// of the reader.
pub fn consume_line(&mut self) -> Result<String, &'static str>
{
let mut continue_consuming: bool;
let mut current_char: char;
let mut buffer: String;
// Create a new buffer to hold the line created.
buffer = String::new();
// Consume all the characters of the line
// and add them to the buffer.
continue_consuming = true;
while continue_consuming == true
{
current_char = try!(self.consume_char());
buffer.push(current_char);
if current_char == '\n'
{
continue_consuming = false;
}
}
// Return the buffer of characters that was created.
Ok(buffer)
}
/// Check to see if we have reached the end of the buffer.
pub fn is_eob(&self) -> bool
{
self.position >= self.buffer.len()
}

49
src/util.rs Normal file
View File

@ -0,0 +1,49 @@
use std::path::{Path, PathBuf};
pub struct Util
{
include_directories: Vec<PathBuf>
}
impl Util
{
pub fn new() -> Util
{
Util
{
include_directories: Vec::new()
}
}
pub fn register_include_dir(&mut self, dir: &Path)
{
let include_dir: PathBuf;
include_dir = PathBuf::from(dir);
self.include_directories.push(include_dir);
}
pub fn get_include_file_path(&self, filepath: &Path) -> Option<PathBuf>
{
let mut path: PathBuf;
// Loop through and find the first path that exists
// when combining it with the paths in the list of include
// directories.
for dir in self.include_directories.iter()
{
path = PathBuf::from(dir).join(filepath);
if path.exists() == true
{
return Some(path);
}
}
None
}
}