diff --git a/src/maaru_lang/mod.rs b/src/maaru_lang/mod.rs index e016e1a..84a8506 100644 --- a/src/maaru_lang/mod.rs +++ b/src/maaru_lang/mod.rs @@ -1,3 +1,6 @@ +extern crate itertools; +use self::itertools::Itertools; + use language::{ProgrammingLanguage, EvaluationMachine, ParseError, TokenError, LLVMCodeString}; pub struct Maaru { @@ -14,7 +17,25 @@ pub struct MaaruEvaluator { } #[derive(Debug)] -pub struct Token { } +pub enum Token { + StrLiteral(String), + Newline, + LParen, + RParen, + LBracket, + RBracket, + LBrace, + RBrace, + Comma, + Identifier(String), + Operator(String), + NumLiteral(Number), +} + +#[derive(Debug)] +pub enum Number { + Integer(u64), +} #[derive(Debug)] pub struct AST { } @@ -28,8 +49,41 @@ impl ProgrammingLanguage for Maaru { "Maaru".to_string() } - fn tokenize(_input: &str) -> Result, TokenError> { - Ok(vec![Token { }]) + fn tokenize(input: &str) -> Result, TokenError> { + use self::Token::*; + let mut tokens = Vec::new(); + let mut iter = input.chars().peekable(); + while let Some(c) = iter.next() { + if c == ';' { + while let Some(c) = iter.next() { + if c == '\n' { + break; + } + } + continue; + } + let cur_tok = match c { + c if char::is_whitespace(c) && c != '\n' => continue, + '\n' => Newline, + '(' => LParen, + ')' => RParen, + '[' => LBracket, + ']' => RBracket, + '{' => LBrace, + '}' => RBrace, + ',' => Comma, + c if char::is_alphanumeric(c) => { + let mut buffer = String::new(); + buffer.push(c); + buffer.extend(iter.peeking_take_while(|x| char::is_whitespace(*x))); + Identifier(buffer) + }, + _ => unimplemented!(), + }; + tokens.push(cur_tok); + } + + Ok(tokens) } fn parse(_input: Vec) -> Result {