From 282926f07c4460d4d5e610f55e41b52dc26f27d3 Mon Sep 17 00:00:00 2001 From: Jason Travis Smith Date: Wed, 27 Jul 2016 01:47:00 -0400 Subject: [PATCH] The Lexer now parses StaticStrings correctly. --- src/lexer/lexer.rs | 100 ++++++++++++++++++++++++++++++++++++++- src/lexer/token_types.rs | 10 +++- 2 files changed, 107 insertions(+), 3 deletions(-) diff --git a/src/lexer/lexer.rs b/src/lexer/lexer.rs index b9a8be1..ce897c7 100644 --- a/src/lexer/lexer.rs +++ b/src/lexer/lexer.rs @@ -52,6 +52,12 @@ fn is_symbol(c: char) -> bool is_whitespace(c) == false && is_alphanumeric(c) == false } +/// +fn is_not_double_quote(c: char) -> bool +{ + c != '"' +} + /// Determines if a String is a keyword. fn is_keyword(s: String) -> bool { @@ -231,6 +237,73 @@ fn consume_identifier(reader: &mut Reader) -> String buffer } +/// +fn consume_static_string(reader: &mut Reader) -> String +{ + let mut is_valid: bool; + let mut buffer: String; + + // Create a buffer to store all the consumed characters. + buffer = String::new(); + + // Check if the first character from the Reader + // is a double quote character. + match reader.get_char() + { + Ok(read_character) => + { + is_valid = read_character == '"'; + } + + Err(error) => + { + warn!("{}", error); + is_valid = false; + } + } + + // If the first character is valid, then consume it and + // consume any alphanumeric character the follow. + if is_valid == true + { + // Consume the first double quote. + match reader.consume_char() + { + Ok(character) => + { + // Add this consumed character to the buffer. + buffer.push(character); + } + + Err(error) => + { + error!("{}", error); + } + } + + // Now read until we get to the next double quote character. + buffer.push_str(&consume_while(reader, is_not_double_quote)); + + // Consume the second double quote. + match reader.consume_char() + { + Ok(character) => + { + // Add this consumed character to the buffer. + buffer.push(character); + } + + Err(error) => + { + error!("{}", error); + } + } + } + + // Return the buffer of consumed characters. + buffer +} + /// Consume characters until the test returns false. fn consume_while(reader: &mut Reader, test: F) -> String where F: Fn(char) -> bool @@ -352,10 +425,33 @@ impl Lexer // Push the token into the list. tokens.push(token); } + else if is_symbol(test_char) + { + // If this is a double quote then it means we are + // looking at a string. If so, then we should consume a string. + if test_char == '"' + { + // This must be a static string. + token = Token::from(consume_static_string(reader)); + token.set_type(TokenTypes::StaticString); + + // Push the token into the list. + tokens.push(token); + } + else + { + // This must be some kind of symbol. + token = Token::from(consume_symbol(reader)); + token.set_type(TokenTypes::Symbol); + + // Push the token into the list. + tokens.push(token); + } + } else { - // This must be some kind of symbol. - token = Token::from(consume_symbol(reader)); + // This is something we don't know of. + token = Token::from(consume_while(reader, is_non_whitespace)); token.set_type(TokenTypes::Unknown); // Push the token into the list. diff --git a/src/lexer/token_types.rs b/src/lexer/token_types.rs index c5123e2..27bbbe5 100644 --- a/src/lexer/token_types.rs +++ b/src/lexer/token_types.rs @@ -24,6 +24,9 @@ pub enum TokenTypes /// These follow Rust's standard of escape characters. StaticString, + /// A symbol is a non alphanumeric or whitespace character. + Symbol, + /// This token is just whitespace, /// one or more space characters. Whitespace, @@ -64,7 +67,12 @@ impl TokenTypes TokenTypes::StaticString => { - "Unknown" + "String" + } + + TokenTypes::Symbol => + { + "Symbol" } TokenTypes::Whitespace =>