use crate::sql::symbol;
use std::fmt;
#[derive(Debug, Clone)]
pub struct Scanner {
message: String,
tokens: Vec<symbol::Symbol>,
pos: Pos,
}
#[derive(Debug, Clone)]
struct Pos {
cursor_l: usize,
cursor_r: usize,
}
#[derive(Debug)]
pub enum LexerError {
NotAllowedChar,
QuoteError,
}
impl fmt::Display for LexerError {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match *self {
LexerError::NotAllowedChar => write!(f, "please use ascii character."),
LexerError::QuoteError => write!(f, "please check the quotes"),
}
}
}
impl Scanner {
pub fn new(message: &str) -> Scanner {
Scanner {
message: message.to_lowercase().trim().to_string(),
tokens: vec![],
pos: Pos {
cursor_l: 0,
cursor_r: 0,
},
}
}
pub fn scan_tokens(&mut self) -> Result<Vec<symbol::Symbol>, LexerError> {
debug!("Starting scanning message:\n`{}`", self.message);
let mut chars = self.message.chars();
let mut is_quoted = false;
let mut quote = '\0';
loop {
match chars.next() {
Some(x) => {
if !is_quoted && (x == '"' || x == '\'') {
quote = x.clone();
}
if x == quote || is_quoted {
self.pos.cursor_r += 1;
if !is_quoted {
is_quoted = true;
} else if x == quote {
let word = self.message.get(self.pos.cursor_l + 1..self.pos.cursor_r - 1).unwrap();
self.tokens
.push(symbol::sym(word, symbol::Token::Identifier, symbol::Group::Identifier));
is_quoted = false;
self.pos.cursor_l = self.pos.cursor_r;
quote = '\0';
}
} else if is_identifier_char(x) || is_operator(x) {
self.pos.cursor_r += 1;
} else {
match x {
' ' | '\t' | '\r' | '\n' | '(' | ')' | ',' | ';' => {
if self.pos.cursor_l != self.pos.cursor_r {
let word = self.message.get(self.pos.cursor_l..self.pos.cursor_r).unwrap();
debug!("encounter `{}`, last word is `{}`", x, word);
let mut is_multi_keyword = false;
if !is_delimiter(x) {
match symbol::check_multi_keywords_front(word) {
Some(parts) => {
debug!("The word `{}` might be a multikeyword", word);
for keyword_total_parts in parts {
debug!("Assume this keyword has {} parts", keyword_total_parts);
let mut test_chars = chars.as_str().chars();
let mut test_str = String::from(format!("{} ", word));
let mut is_last_letter = false;
let mut step_counter = 0;
let mut following_parts = 0;
loop {
match test_chars.next() {
Some(y) => {
if y.is_ascii_alphabetic() {
if !is_last_letter {
is_last_letter = true;
}
test_str.push(y);
} else {
match y {
' ' | '\t' | '\r' | '\n' => {
if is_last_letter {
following_parts += 1;
if following_parts
== keyword_total_parts - 1
{
break;
}
test_str.push(' ');
is_last_letter = false
}
}
_ => break,
}
}
}
None => break,
}
step_counter += 1;
}
debug!("Checking `{}` ...", test_str);
match symbol::SYMBOLS.get(test_str.as_str()) {
Some(token) => {
debug!("Found keyword `{}`", test_str);
self.tokens.push(token.clone());
self.pos.cursor_r += step_counter;
for _ in 0..step_counter {
chars.next();
}
is_multi_keyword = true;
break;
}
None => debug!("`{}` not a keyword", test_str),
}
}
}
None => {}
}
}
if !is_multi_keyword {
match symbol::SYMBOLS.get(word) {
Some(token) => {
self.tokens.push(token.clone());
}
None => {
self.tokens.push(symbol::sym(
word,
symbol::Token::Identifier,
symbol::Group::Identifier,
));
}
}
}
}
if is_delimiter(x) {
debug!("take `{}`", x);
self.tokens.push(symbol::Symbol::match_delimiter(x).unwrap());
}
self.pos.cursor_r += 1;
self.pos.cursor_l = self.pos.cursor_r;
}
'*' => {
self.tokens.push(symbol::sym(
"*",
symbol::Token::Identifier,
symbol::Group::Identifier,
));
self.pos.cursor_r += 1;
self.pos.cursor_l = self.pos.cursor_r;
}
_ => {
return Err(LexerError::NotAllowedChar);
}
}
}
}
None => {
if is_quoted {
return Err(LexerError::QuoteError);
}
break;
}
};
}
Ok(self.tokens.clone())
}
}
fn is_identifier_char(ch: char) -> bool {
ch.is_digit(10) || ch.is_ascii_alphabetic() || ch == '\'' || ch == '.' || ch == '"'
}
fn is_operator(ch: char) -> bool {
ch == '>' || ch == '=' || ch == '<' || ch == '-' || ch == '+'
}
fn is_delimiter(ch: char) -> bool {
ch == '(' || ch == ')' || ch == ',' || ch == ';'
}
#[cfg(test)]
mod tests {
use super::*;
use env_logger;
#[test]
pub fn test_quote() {
let message = "'123://'";
let mut s = Scanner::new(message);
let tokens = s.scan_tokens().unwrap();
let mut iter = (&tokens).iter();
let x = iter.next().unwrap();
println!("test{:?}", x.name);
assert_eq!(
format!("{:?}, {:?}, {:?}", x.name, x.token, x.group),
"\"123://\", Identifier, Identifier"
);
let message = "'qqq\"' ,123";
let mut s = Scanner::new(message);
let tokens = s.scan_tokens().unwrap();
let mut iter = (&tokens).iter();
let x = iter.next().unwrap();
println!("test{:?}", x.name);
assert_eq!(
format!("{:?}, {:?}, {:?}", x.name, x.token, x.group),
"\"qqq\\\"\", Identifier, Identifier"
);
let message = "\"qqq\', 123 ";
let mut s = Scanner::new(message);
match s.scan_tokens() {
Ok(_) => {}
Err(e) => assert_eq!(format!("{}", e), "please check the quotes"),
}
}
#[test]
pub fn test_scan_tokens() {
let message = "select customername, contactname, address from customers where address is null;";
let mut s = Scanner::new(message);
let tokens = s.scan_tokens().unwrap();
let mut iter = (&tokens).iter();
let x = iter.next().unwrap();
assert_eq!(
format!("{:?}, {:?}, {:?}", x.name, x.token, x.group),
"\"select\", Select, Keyword"
);
let x = iter.next().unwrap();
assert_eq!(
format!("{:?}, {:?}, {:?}", x.name, x.token, x.group),
"\"customername\", Identifier, Identifier"
);
let x = iter.next().unwrap();
assert_eq!(
format!("{:?}, {:?}, {:?}", x.name, x.token, x.group),
"\",\", Comma, Delimiter"
);
let x = iter.next().unwrap();
assert_eq!(
format!("{:?}, {:?}, {:?}", x.name, x.token, x.group),
"\"contactname\", Identifier, Identifier"
);
let x = iter.next().unwrap();
assert_eq!(
format!("{:?}, {:?}, {:?}", x.name, x.token, x.group),
"\",\", Comma, Delimiter"
);
let x = iter.next().unwrap();
assert_eq!(
format!("{:?}, {:?}, {:?}", x.name, x.token, x.group),
"\"address\", Identifier, Identifier"
);
let x = iter.next().unwrap();
assert_eq!(
format!("{:?}, {:?}, {:?}", x.name, x.token, x.group),
"\"from\", From, Keyword"
);
let x = iter.next().unwrap();
assert_eq!(
format!("{:?}, {:?}, {:?}", x.name, x.token, x.group),
"\"customers\", Identifier, Identifier"
);
let x = iter.next().unwrap();
assert_eq!(
format!("{:?}, {:?}, {:?}", x.name, x.token, x.group),
"\"where\", Where, Keyword"
);
let x = iter.next().unwrap();
assert_eq!(
format!("{:?}, {:?}, {:?}", x.name, x.token, x.group),
"\"address\", Identifier, Identifier"
);
let x = iter.next().unwrap();
assert_eq!(
format!("{:?}, {:?}, {:?}", x.name, x.token, x.group),
"\"is null\", IsNull, Keyword"
);
let x = iter.next().unwrap();
assert_eq!(
format!("{:?}, {:?}, {:?}", x.name, x.token, x.group),
"\";\", Semicolon, Delimiter"
);
assert!(iter.next().is_none());
let message = "select * from customers;";
let mut s = Scanner::new(message);
let tokens = s.scan_tokens().unwrap();
let mut iter = (&tokens).iter();
let x = iter.next().unwrap();
assert_eq!(
format!("{:?}, {:?}, {:?}", x.name, x.token, x.group),
"\"select\", Select, Keyword"
);
let x = iter.next().unwrap();
assert_eq!(
format!("{:?}, {:?}, {:?}", x.name, x.token, x.group),
"\"*\", Identifier, Identifier"
);
let x = iter.next().unwrap();
assert_eq!(
format!("{:?}, {:?}, {:?}", x.name, x.token, x.group),
"\"from\", From, Keyword"
);
let x = iter.next().unwrap();
assert_eq!(
format!("{:?}, {:?}, {:?}", x.name, x.token, x.group),
"\"customers\", Identifier, Identifier"
);
let x = iter.next().unwrap();
assert_eq!(
format!("{:?}, {:?}, {:?}", x.name, x.token, x.group),
"\";\", Semicolon, Delimiter"
);
assert!(iter.next().is_none());
let message = "insert \n\r\tinto \t\tcustomers \n(customername,\n\n city)\n\n values ('cardinal', 'norway');";
let mut s = Scanner::new(message);
let tokens = s.scan_tokens().unwrap();
let mut iter = (&tokens).iter();
let x = iter.next().unwrap();
assert_eq!(
format!("{:?}, {:?}, {:?}", x.name, x.token, x.group),
"\"insert into\", InsertInto, Keyword"
);
let x = iter.next().unwrap();
assert_eq!(
format!("{:?}, {:?}, {:?}", x.name, x.token, x.group),
"\"customers\", Identifier, Identifier"
);
let x = iter.next().unwrap();
assert_eq!(
format!("{:?}, {:?}, {:?}", x.name, x.token, x.group),
"\"(\", ParentLeft, Delimiter"
);
let x = iter.next().unwrap();
assert_eq!(
format!("{:?}, {:?}, {:?}", x.name, x.token, x.group),
"\"customername\", Identifier, Identifier"
);
let x = iter.next().unwrap();
assert_eq!(
format!("{:?}, {:?}, {:?}", x.name, x.token, x.group),
"\",\", Comma, Delimiter"
);
let x = iter.next().unwrap();
assert_eq!(
format!("{:?}, {:?}, {:?}", x.name, x.token, x.group),
"\"city\", Identifier, Identifier"
);
let x = iter.next().unwrap();
assert_eq!(
format!("{:?}, {:?}, {:?}", x.name, x.token, x.group),
"\")\", ParentRight, Delimiter"
);
let x = iter.next().unwrap();
assert_eq!(
format!("{:?}, {:?}, {:?}", x.name, x.token, x.group),
"\"values\", Values, Keyword"
);
let x = iter.next().unwrap();
assert_eq!(
format!("{:?}, {:?}, {:?}", x.name, x.token, x.group),
"\"(\", ParentLeft, Delimiter"
);
let x = iter.next().unwrap();
assert_eq!(
format!("{:?}, {:?}, {:?}", x.name, x.token, x.group),
"\"cardinal\", Identifier, Identifier"
);
let x = iter.next().unwrap();
assert_eq!(
format!("{:?}, {:?}, {:?}", x.name, x.token, x.group),
"\",\", Comma, Delimiter"
);
let x = iter.next().unwrap();
assert_eq!(
format!("{:?}, {:?}, {:?}", x.name, x.token, x.group),
"\"norway\", Identifier, Identifier"
);
let x = iter.next().unwrap();
assert_eq!(
format!("{:?}, {:?}, {:?}", x.name, x.token, x.group),
"\")\", ParentRight, Delimiter"
);
let x = iter.next().unwrap();
assert_eq!(
format!("{:?}, {:?}, {:?}", x.name, x.token, x.group),
"\";\", Semicolon, Delimiter"
);
assert!(iter.next().is_none());
let message = "create table x1;";
let mut s = Scanner::new(message);
let tokens = s.scan_tokens().unwrap();
debug!("{:?}", tokens);
let mut iter = (&tokens).iter();
let x = iter.next().unwrap();
assert_eq!(
format!("{:?}, {:?}, {:?}", x.name, x.token, x.group),
"\"create table\", CreateTable, Keyword"
);
let x = iter.next().unwrap();
assert_eq!(
format!("{:?}, {:?}, {:?}", x.name, x.token, x.group),
"\"x1\", Identifier, Identifier"
);
let x = iter.next().unwrap();
assert_eq!(
format!("{:?}, {:?}, {:?}", x.name, x.token, x.group),
"\";\", Semicolon, Delimiter"
);
assert!(iter.next().is_none());
}
#[test]
fn test_scan_tokens_error() {
let message = "create table $1234";
let mut s = Scanner::new(message);
match s.scan_tokens() {
Ok(_) => {}
Err(e) => assert_eq!(format!("{}", e), "please use ascii character."),
}
}
}