schala/src/tokenizer.rs

118 lines
2.8 KiB
Rust
Raw Normal View History

2015-07-22 03:02:55 -07:00
#[derive(Debug, Clone)]
pub enum Token {
EOF,
Separator,
LParen,
RParen,
Comma,
2015-07-26 01:51:15 -07:00
Period,
2015-07-22 03:02:55 -07:00
NumLiteral(f64),
StrLiteral(String),
2015-07-22 04:01:56 -07:00
Identifier(String),
Keyword(Kw)
}
#[derive(Debug, Clone, PartialEq)]
pub enum Kw {
If,
Then,
Else,
While,
End,
Let,
Fn,
2015-08-08 00:27:40 -07:00
Null,
Assign
2015-07-22 03:02:55 -07:00
}
2015-07-22 03:12:01 -07:00
pub fn tokenize(input: &str) -> Vec<Token> {
let mut tokens = Vec::new();
let mut iterator = input.chars().peekable();
fn ends_identifier(c: char) -> bool {
match c {
c if char::is_whitespace(c) => true,
',' => true,
';' => true,
'(' => true,
')' => true,
2015-07-26 01:51:15 -07:00
'.' => true,
2015-07-22 03:12:01 -07:00
_ => false
}
}
while let Some(c) = iterator.next() {
if char::is_whitespace(c) && c != '\n' {
2015-07-22 03:12:01 -07:00
continue;
} else if c == '"' {
let mut buffer = String::with_capacity(20);
loop {
match iterator.next() {
Some(x) if x == '"' => break,
Some(x) => buffer.push(x),
None => return tokens,
2015-07-22 03:12:01 -07:00
}
}
tokens.push(Token::StrLiteral(buffer));
} else if c == '#' {
while let Some(x) = iterator.next() {
if x == '\n' {
break;
}
}
} else if c == ';' || c == '\n' {
if let Some(&Token::Separator) = tokens.last() {
//skip past multiple separators
} else {
tokens.push(Token::Separator);
}
2015-07-22 03:12:01 -07:00
} else if c == '(' {
tokens.push(Token::LParen);
} else if c == ')' {
tokens.push(Token::RParen);
} else if c == ',' {
tokens.push(Token::Comma);
2015-07-26 01:51:15 -07:00
} else if c == '.' {
tokens.push(Token::Period);
2015-07-22 03:12:01 -07:00
} else {
let mut buffer = String::with_capacity(20);
buffer.push(c);
while let Some(x) = iterator.peek().cloned() {
if ends_identifier(x) {
break;
}
buffer.push(iterator.next().unwrap());
}
match buffer.parse::<f64>() {
Ok(f) => tokens.push(Token::NumLiteral(f)),
2015-07-22 04:01:56 -07:00
_ => tokens.push(handle_identifier(buffer))
2015-07-22 03:12:01 -07:00
}
}
}
tokens.push(Token::EOF);
tokens
}
2015-07-22 04:01:56 -07:00
fn handle_identifier(identifier: String) -> Token {
2015-07-25 13:55:18 -07:00
let keyword = match &identifier[..] {
"let" => Kw::Let,
"if" => Kw::If,
"then" => Kw::Then,
"else" => Kw::Else,
"while" => Kw::While,
"end" => Kw::End,
"fn" => Kw::Fn,
"null" => Kw::Null,
2015-08-08 00:27:40 -07:00
"=" => Kw::Assign,
2015-07-25 13:55:18 -07:00
_ => return Token::Identifier(identifier)
};
return Token::Keyword(keyword);
2015-07-22 04:01:56 -07:00
}