diff --git a/examples/compile.rs b/examples/compile.rs index be659a4..d9f6025 100644 --- a/examples/compile.rs +++ b/examples/compile.rs @@ -25,9 +25,20 @@ pub const TEST_OUTPUT_FILENAME: &'static str = "test.rs"; /// pub fn main() { + let mut compiler: Compiler; + let mut include: PathBuf; let mut input: PathBuf; let mut output: PathBuf; + // Create a new compiler. + compiler = Compiler::new(); + + // Add the resource directory as an include directory + // for the compiler. + include = PathBuf::from(env!("CARGO_MANIFEST_DIR")); + include.push(RESOURCE_DIR); + compiler.register_include_dir(&include); + // Get the input file to test with. It is in the // resources directory. input = PathBuf::from(env!("CARGO_MANIFEST_DIR")); @@ -41,5 +52,5 @@ pub fn main() output.push(TEST_OUTPUT_FILENAME); println!("Compiling {:?} to {:?}", input, output); - Compiler::compile(input, output); + compiler.compile(input, output); } diff --git a/src/compiler.rs b/src/compiler.rs index d8a6447..d8bf99a 100644 --- a/src/compiler.rs +++ b/src/compiler.rs @@ -2,8 +2,11 @@ use std::fs::File; use std::io::Write; use std::path::Path; -use super::lexer::Lexer; -use super::reader::Reader; +use ::lexer::Lexer; +use ::parser::Parser; +use ::reader::Reader; +use ::util::Util; + const FILE_HEADER: &'static str = @@ -14,41 +17,39 @@ const FILE_HEADER: &'static str = /// -pub enum Compiler +pub struct Compiler { + util: Util } /// Reads an input File and parses it. -fn read_file(input_path: &Path) -> String +pub fn read_file(util: &Util, input_path: &Path) -> String { let mut output: String; let mut reader: Reader; + let mut lines: Vec; - output = String::from(FILE_HEADER); + // Create the output string of the compiled file. + output = String::new(); // Create a Reader from the given input file. reader = Reader::from_file(input_path); - // Use the Lexer to scan the Reader's - // buffer into tokens. - match Lexer::scan(reader) + Lexer::scan(&mut reader); + + /* + // Parse the file and turn it into a set of compiled lines. + lines = Parser::parse(&util, &mut reader); + + // Add all these lines to the final output. + for line in lines.into_iter() { - Ok(tokens) => - { - for token in tokens - { - output.push_str(&token); - } - } - - Err(error) => - { - error!("{}", error); - } + output.push_str(&line); } - +*/ + // Return the final output. output } @@ -90,14 +91,42 @@ fn write_file(output_path: &Path, output: String) impl Compiler { - /// Compile - pub fn compile(input: F, output: F) + /// Create a new Compiler to use. + pub fn new() -> Compiler + { + Compiler + { + util: Util::new() + } + } + + /// Register an include directory. + pub fn register_include_dir(&mut self, include_dir: &Path) + { + self.util.register_include_dir(include_dir); + } + + /// Register a list of include directories. + pub fn register_include_dirs(&mut self, include_dirs: Vec<&Path>) + { + for dir in include_dirs.iter() + { + self.register_include_dir(dir); + } + } + + /// Compile a given input file. + pub fn compile(&self, input: F, output: F) where F: AsRef { - let output_string: String; + let mut output_string: String; + + // Create the output string of the compiled file. + // Start it with the compilation header. + output_string = String::from(FILE_HEADER); // Turn the input file into a compiled String. - output_string = read_file(input.as_ref()); + output_string.push_str(&read_file(&self.util, input.as_ref())); // Write the compiled output file. write_file(output.as_ref(), output_string); diff --git a/src/lexer.rs b/src/lexer.rs deleted file mode 100644 index b8f3a51..0000000 --- a/src/lexer.rs +++ /dev/null @@ -1,66 +0,0 @@ -use super::reader::Reader; - - - -/// -pub enum Lexer -{ -} - - -/// Determines if a character is alphabetic. -fn is_alpha(c: char) -> bool -{ - c.is_alphabetic() -} - -/// Determines if a character is a numeral -fn is_numeric(c: char) -> bool -{ - c.is_numeric() -} - -/// Determines if a character is whitespace. -fn is_whitespace(c: char) -> bool -{ - c.is_whitespace() -} - -/// Returns true if the next set of characters starts with -/// the given pattern; Otherwise, false. -fn starts_with

(pattern: P) -> bool - where P: AsRef -{ - false -} - -/// Consume and discard zero or more whitespace characters. -fn consume_whitespace() -{ - consume_while(is_whitespace); -} - -/// Consume characters until the test returns false. -fn consume_while(test: F) -> String - where F: Fn(char) -> bool -{ - String::new() -} - - - -impl Lexer -{ - /// Scan the data held by the Reader. This will consume the Reader and - /// clear it. - pub fn scan(reader: Reader) -> Result, String> - { - let tokens: Vec; - - // Create a new list to hold all the discovered tokens. - tokens = Vec::new(); - - // Return the scanned tokens. - Ok(tokens) - } -} diff --git a/src/lexer/lexer.rs b/src/lexer/lexer.rs new file mode 100644 index 0000000..b9a8be1 --- /dev/null +++ b/src/lexer/lexer.rs @@ -0,0 +1,377 @@ +use ::reader::Reader; +use ::lexer::token::Token; +use ::lexer::token_types::TokenTypes; + + + +/// The keywords for this language. +const keywords: [&'static str; 1] = ["include"]; + + + +/// Performs lexigraphical analysis on +/// the text given from a Reader. +pub enum Lexer +{ +} + + + +/// Determines if a character is alphabetic. +fn is_alpha(c: char) -> bool +{ + c.is_alphabetic() +} + +/// Determines if a character is a numeral +fn is_numeric(c: char) -> bool +{ + c.is_numeric() +} + +/// Determines if a character is a numeral +fn is_alphanumeric(c: char) -> bool +{ + is_alpha(c) | is_numeric(c) +} + +/// Determines if a character is whitespace. +fn is_whitespace(c: char) -> bool +{ + c.is_whitespace() +} + +fn is_non_whitespace(c: char) -> bool +{ + !is_whitespace(c) +} + +/// Determines if a character is a symbol. +fn is_symbol(c: char) -> bool +{ + is_whitespace(c) == false && is_alphanumeric(c) == false +} + +/// Determines if a String is a keyword. +fn is_keyword(s: String) -> bool +{ + for keyword in keywords.iter() + { + if &s == keyword + { + return true + } + } + + false +} + +/// Returns true if the next set of characters starts with +/// the given pattern; Otherwise, false. +fn starts_with

(pattern: P) -> bool + where P: AsRef +{ + false +} + +/// Consume and discard zero or more whitespace characters. +fn consume_whitespace(reader: &mut Reader) -> String +{ + consume_while(reader, is_whitespace) +} + +/// +fn consume_symbol(reader: &mut Reader) -> String +{ + let mut is_valid: bool; + let mut buffer: String; + + // Create a buffer to store all the consumed characters. + buffer = String::new(); + + // Check if the first character from the Reader + // is a symbol character. + match reader.get_char() + { + Ok(read_character) => + { + is_valid = is_symbol(read_character); + } + + Err(error) => + { + warn!("{}", error); + is_valid = false; + } + } + + if is_valid == true + { + // Consume this character that passed. + match reader.consume_char() + { + Ok(character) => + { + // Add this consumed character to the buffer. + buffer.push(character); + } + + Err(error) => + { + error!("{}", error); + } + } + } + + // Return the buffer of consumed characters. + buffer +} + +/// +fn consume_numeral(reader: &mut Reader) -> String +{ + let mut is_valid: bool; + let mut buffer: String; + + // Create a buffer to store all the consumed characters. + buffer = String::new(); + + // Check if the first character from the Reader + // is a numeric character. + match reader.get_char() + { + Ok(read_character) => + { + is_valid = is_numeric(read_character); + } + + Err(error) => + { + warn!("{}", error); + is_valid = false; + } + } + + // If the first character is valid, then consume it and + // consume any numeric characters that follow. + if is_valid == true + { + // Consume this character that passed. + match reader.consume_char() + { + Ok(character) => + { + // Add this consumed character to the buffer. + buffer.push(character); + } + + Err(error) => + { + error!("{}", error); + } + } + + // Now read any numeric characters that follow. + buffer.push_str(&consume_while(reader, is_non_whitespace)); + } + + // Return the buffer of consumed characters. + buffer +} + +/// +fn consume_identifier(reader: &mut Reader) -> String +{ + let mut is_valid: bool; + let mut buffer: String; + + // Create a buffer to store all the consumed characters. + buffer = String::new(); + + // Check if the first character from the Reader + // is an alpha character. + match reader.get_char() + { + Ok(read_character) => + { + is_valid = is_alpha(read_character); + } + + Err(error) => + { + warn!("{}", error); + is_valid = false; + } + } + + // If the first character is valid, then consume it and + // consume any alphanumeric character the follow. + if is_valid == true + { + // Consume this character that passed. + match reader.consume_char() + { + Ok(character) => + { + // Add this consumed character to the buffer. + buffer.push(character); + } + + Err(error) => + { + error!("{}", error); + } + } + + // Now read any alphanumeric characters that follow. + buffer.push_str(&consume_while(reader, is_alphanumeric)); + } + + // Return the buffer of consumed characters. + buffer +} + +/// Consume characters until the test returns false. +fn consume_while(reader: &mut Reader, test: F) -> String + where F: Fn(char) -> bool +{ + let mut is_valid: bool; + let mut buffer: String; + + // Create a buffer to store all the consumed characters. + buffer = String::new(); + + // Get a character from the reader. + match reader.get_char() + { + Ok(read_character) => + { + is_valid = test(read_character); + } + + Err(error) => + { + warn!("{}", error); + is_valid = false; + } + } + + // Loop until we get to invalid input. + while is_valid == true + { + // Consume this character that passed. + match reader.consume_char() + { + Ok(character) => + { + // Add this consumed character to the buffer. + buffer.push(character); + + // Get a character from the reader. + match reader.get_char() + { + Ok(read_character) => + { + is_valid = test(read_character); + } + + Err(error) => + { + warn!("{}", error); + is_valid = false; + } + } + } + + Err(error) => + { + is_valid = false; + error!("{}", error); + } + } + } + + // Return the buffer of consumed characters. + buffer +} + + + +impl Lexer +{ + /// Scan the data held by the Reader. This will consume the Reader and + /// clear it. + pub fn scan(reader: &mut Reader) -> Result, &'static str> + { + let mut test_char: char; + let mut token: Token; + let mut tokens: Vec; + + // Create a new list to hold all the discovered tokens. + tokens = Vec::new(); + + // Begin turning the input into tokens. + while reader.is_eob() == false + { + test_char = try!(reader.get_char()); + if is_whitespace(test_char) + { + // Just skip/remove any whitespace. + token = Token::from(consume_whitespace(reader)); + token.set_type(TokenTypes::Whitespace); + + // Push the token into the list. + tokens.push(token); + } + else if is_numeric(test_char) + { + // The next character is a numeric, + // so we are probably looking at a numeral. + token = Token::from(consume_numeral(reader)); + token.set_type(TokenTypes::Numeric); + + // Push the token into the list. + tokens.push(token); + } + else if is_alpha(test_char) + { + // The next character is an alpha character, + // so we are probably looking at an identifier. + token = Token::from(consume_identifier(reader)); + + // Determine if this identifier is a keyword. + if is_keyword(token.to_string()) == true + { + token.set_type(TokenTypes::Keyword); + } + else + { + token.set_type(TokenTypes::Identifier); + } + + // Push the token into the list. + tokens.push(token); + } + else + { + // This must be some kind of symbol. + token = Token::from(consume_symbol(reader)); + token.set_type(TokenTypes::Unknown); + + // Push the token into the list. + tokens.push(token); + } + } + + // Clear the reader since we are finished with it. + reader.clear(); + +for token in tokens.iter() +{ + println!("{}: {}", token.get_type(), token); +} + + // Return the scanned tokens. + Ok(tokens) + } +} diff --git a/src/lexer/mod.rs b/src/lexer/mod.rs new file mode 100644 index 0000000..c3e47fd --- /dev/null +++ b/src/lexer/mod.rs @@ -0,0 +1,9 @@ +mod lexer; +mod token; +mod token_types; + + + +pub use self::lexer::Lexer; +pub use self::token::Token; +pub use self::token_types::TokenTypes; diff --git a/src/lexer/token.rs b/src/lexer/token.rs new file mode 100644 index 0000000..c288910 --- /dev/null +++ b/src/lexer/token.rs @@ -0,0 +1,103 @@ +use std::convert::From; +use std::str::FromStr; + +use ::lexer::token_types::TokenTypes; + + + +/// +pub struct Token +{ + /// + string: String, + + /// + variant: TokenTypes +} + + + +impl Token +{ + /// + pub fn new() -> Token + { + Token::from("") + } + + /// + pub fn get_type(&self) -> TokenTypes + { + self.variant + } + + /// + pub fn set_type(&mut self, token_type: TokenTypes) + { + self.variant = token_type; + } +} + +impl ::std::str::FromStr for Token +{ + type Err = String; + + fn from_str(s: &str) -> Result + { + let token: Token; + + // Create the new Token from the given &str. + token = + Token + { + string: String::from(s), + variant: TokenTypes::Unknown + }; + + // Return the token. + Ok(token) + } +} + +impl<'a> ::std::convert::From<&'a str> for Token +{ + fn from(s: &'a str) -> Self + { + // Just call the FromStr and handle the error. + match Token::from_str(s) + { + Ok(token) => + { + token + } + + Err(error) => + { + // Just warn and create a blank unknown Token. + warn!("{}", error); + Token + { + string: String::new(), + variant: TokenTypes::Unknown + } + } + } + } +} + +impl From for Token +{ + fn from(s: String) -> Self + { + // Just call the From<&str>. + Token::from(s.as_str()) + } +} + +impl ::std::fmt::Display for Token +{ + fn fmt(&self, f: &mut ::std::fmt::Formatter) -> ::std::fmt::Result + { + write!(f, "{}", self.string) + } +} diff --git a/src/lexer/token_types.rs b/src/lexer/token_types.rs new file mode 100644 index 0000000..c5123e2 --- /dev/null +++ b/src/lexer/token_types.rs @@ -0,0 +1,104 @@ +/// The different types of Tokens. +#[derive(Clone, Copy, PartialEq, PartialOrd, Eq, Ord)] +pub enum TokenTypes +{ + /// A Comment token. + Comment, + + /// An Identifier that has been confirmed + /// to be a Keyword. + Keyword, + + /// An alpha character followed by + /// zero or more of alpha characters, numeric characters, + /// or underscores. After the first character these can be + /// mixed in any order. + Identifier, + + /// A numeric character followed by + /// zero or more numeric characters, zero or one period, + /// and zero or one appendable type identifies. + Numeric, + + /// A set of characters within double quote marks. + /// These follow Rust's standard of escape characters. + StaticString, + + /// This token is just whitespace, + /// one or more space characters. + Whitespace, + + /// This token has something, but we don't know what. + /// This should not occur. + Unknown +} + + + +impl TokenTypes +{ + /// Get a str representation of this variant. + pub fn to_str(&self) -> &'static str + { + match *self + { + TokenTypes::Comment => + { + "Comment" + } + + TokenTypes::Keyword => + { + "Keyword" + } + + TokenTypes::Identifier => + { + "Identifier" + } + + TokenTypes::Numeric => + { + "Numeric" + } + + TokenTypes::StaticString => + { + "Unknown" + } + + TokenTypes::Whitespace => + { + "Whitespace" + } + + TokenTypes::Unknown => + { + "Unknown" + } + } + } + + /// Get a String representation of this variant. + pub fn to_string(&self) -> String + { + String::from(self.to_str()) + } +} + +impl ::std::fmt::Debug for TokenTypes +{ + fn fmt(&self, f: &mut ::std::fmt::Formatter) -> ::std::fmt::Result + { + write!(f, "{}", self.to_str()) + } +} + +impl ::std::fmt::Display for TokenTypes +{ + fn fmt(&self, f: &mut ::std::fmt::Formatter) -> ::std::fmt::Result + { + write!(f, "{}", self.to_str()) + } +} + diff --git a/src/lib.rs b/src/lib.rs index 8dbe9bb..f300024 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -7,6 +7,7 @@ mod compiler; mod lexer; mod parser; mod reader; +mod util; diff --git a/src/parser.rs b/src/parser.rs index febe672..78e6954 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -1,11 +1,90 @@ -/// -pub struct Parser -{ - /// - position: usize, +use std::path::PathBuf; - /// - input: String +use ::compiler::read_file; +use ::reader::Reader; +use ::util::Util; + + + +/// +pub enum Parser +{ +} + + + +/// +fn process_line(util: &Util, output: &mut Vec, line: String) +{ + let start: usize; + let end: usize; + let mut include_file: PathBuf; + let mut processed_line: String; + + // Copy the given line. + processed_line = line.clone(); + + // Remove any code block brackets. + processed_line = processed_line.replace("{@", ""); + processed_line = processed_line.replace("@}", ""); + + // Just handling the include code right now. + if processed_line.contains("include") == true + { + // Parse one or more include statements on a line. + + // Cut out the include statement so it can be processed. + match processed_line.find("\"") + { + Some(location) => + { + // Add one to move past the quote mark. + start = location + 1usize; + } + + None => + { + start = 0usize; + } + } + + match processed_line[start..].find("\"") + { + Some(location) => + { + end = start + location; + } + + None => + { + end = start + 0usize; + } + } + + include_file = PathBuf::from(&processed_line[start..end]); + + // Try to find the include file in one of the + // include directories. + match util.get_include_file_path(&include_file) + { + Some(file) => + { + // Process the include file found and add it to + // the output list. + output.push(read_file(util, &file)); + } + + None => + { + error!("Unable to find desired include file: {:?}", + include_file); + } + } + } + else + { + output.push(processed_line); + } } @@ -13,15 +92,31 @@ pub struct Parser impl Parser { /// - pub fn parse(&mut self) + pub fn parse(util: &Util, reader: &mut Reader) -> Vec { - } + let mut output: Vec; - /// Returns true if the next set of characters starts with - /// the given pattern; Otherwise, false. - pub fn starts_with

(&self, pattern: P) -> bool - where P: AsRef - { - self.position >= self.input.len() + // Create a new set of lines to hold the parsed data. + output = Vec::new(); + + // Start parsing all the given lines. + while reader.is_eob() == false + { + match reader.consume_line() + { + Ok(line) => + { + process_line(util, &mut output, line) + } + + Err(error) => + { + error!("{}", error); + } + } + } + + // Return the parsed input. + output } } diff --git a/src/reader.rs b/src/reader.rs index 504d0b0..202dc5c 100644 --- a/src/reader.rs +++ b/src/reader.rs @@ -129,12 +129,15 @@ impl Reader /// Get the current character. pub fn get_char(&self) -> Result { - if self.is_eof() == false + // Make sure we are not at the end of the buffer. + if self.is_eob() == false { + // Return the character at the current position. Ok(self.buffer[self.position]) } else { + // There was an error. Err("Unable to read character. \ No characters left in the input buffer.") } @@ -146,15 +149,76 @@ impl Reader { let character: char; + // Get the current character and move the position + // in the buffer forward. character = try!(self.get_char()); - self.position += 1; + // Return the character that was retrieved. Ok(character) } - /// - fn is_eof(&self) -> bool + /// Get a line of text from the current character to the next + /// newline character, including the current character. + pub fn get_line(&mut self) -> Result + { + let mut index: usize; + let mut buffer: String; + + // Create a new buffer to hold the line created. + buffer = String::new(); + + // Now get the current position the reader is at. + index = self.position; + while index < self.buffer.len() && self.buffer[index] != '\n' + { + buffer.push(self.buffer[index]); + index += 1; + } + + // Read the newline character if the reader did not run out of buffer. + if index < self.buffer.len() && self.buffer[index] == '\n' + { + buffer.push(self.buffer[index]); + } + + // Return the buffer of characters that was created. + Ok(buffer) + } + + /// Get a line of text from the current character to the next + /// newline character, including the current character and newline. + /// This will consume all the characters and move the position + /// of the reader. + pub fn consume_line(&mut self) -> Result + { + let mut continue_consuming: bool; + let mut current_char: char; + let mut buffer: String; + + // Create a new buffer to hold the line created. + buffer = String::new(); + + // Consume all the characters of the line + // and add them to the buffer. + continue_consuming = true; + while continue_consuming == true + { + current_char = try!(self.consume_char()); + buffer.push(current_char); + + if current_char == '\n' + { + continue_consuming = false; + } + } + + // Return the buffer of characters that was created. + Ok(buffer) + } + + /// Check to see if we have reached the end of the buffer. + pub fn is_eob(&self) -> bool { self.position >= self.buffer.len() } diff --git a/src/util.rs b/src/util.rs new file mode 100644 index 0000000..686cc75 --- /dev/null +++ b/src/util.rs @@ -0,0 +1,49 @@ +use std::path::{Path, PathBuf}; + + + +pub struct Util +{ + include_directories: Vec +} + + + +impl Util +{ + pub fn new() -> Util + { + Util + { + include_directories: Vec::new() + } + } + + pub fn register_include_dir(&mut self, dir: &Path) + { + let include_dir: PathBuf; + + include_dir = PathBuf::from(dir); + + self.include_directories.push(include_dir); + } + + pub fn get_include_file_path(&self, filepath: &Path) -> Option + { + let mut path: PathBuf; + + // Loop through and find the first path that exists + // when combining it with the paths in the list of include + // directories. + for dir in self.include_directories.iter() + { + path = PathBuf::from(dir).join(filepath); + if path.exists() == true + { + return Some(path); + } + } + + None + } +}