schala/src/tokenizer.rs

#[derive(Debug, Clone)]
pub enum Token {
    EOF,
    Separator,
    LParen,
    RParen,
    Comma,
    Period,
    NumLiteral(f64),
    StrLiteral(String),
    Identifier(String),
    Keyword(Kw)
}

#[derive(Debug, Clone, PartialEq)]
pub enum Kw {
    If,
    Then,
    Else,
    While,
    End,
    Let,
    Fn,
    Null,
    Assign
}

pub fn tokenize(input: &str) -> Vec<Token> {
    let mut tokens = Vec::new();
    let mut iterator = input.chars().peekable();

    fn ends_identifier(c: char) -> bool {
        match c {
            c if char::is_whitespace(c) => true,
            ',' => true,
            ';' => true,
            '(' => true,
            ')' => true,
            '.' => true,
            _ => false
        }
    }

    while let Some(c) = iterator.next() {

        if char::is_whitespace(c) && c != '\n' {
            continue;
        } else if c == '"' {

            let mut buffer = String::with_capacity(20);
            loop {
                match iterator.next() {
                    Some(x) if x == '"' => break,
                    Some(x) => buffer.push(x),
                    None => return tokens,
                }
            }
            tokens.push(Token::StrLiteral(buffer));

        } else if c == '#' {
            while let Some(x) = iterator.next() {
                if x == '\n' {
                    break;
                }
            }
        } else if c == ';' || c == '\n' {
            if let Some(&Token::Separator) = tokens.last() {
            } else {
                tokens.push(Token::Separator);
            }
        } else if c == '(' {
            tokens.push(Token::LParen);
        } else if c == ')' {
            tokens.push(Token::RParen);
        } else if c == ',' {
            tokens.push(Token::Comma);
        } else if c == '.' {
            tokens.push(Token::Period);
        } else {
            let mut buffer = String::with_capacity(20);
            buffer.push(c);

            while let Some(x) = iterator.peek().cloned() {
                if ends_identifier(x) {
                    break;
                }
                buffer.push(iterator.next().unwrap());
            }

            match buffer.parse::<f64>() {
                Ok(f) => tokens.push(Token::NumLiteral(f)),
                _ => tokens.push(handle_identifier(buffer))
            }
        }
    }
    tokens.push(Token::EOF);
    tokens
}

fn handle_identifier(identifier: String) -> Token {

    let keyword = match &identifier[..] {
        "let" => Kw::Let,
        "if"  => Kw::If,
        "then" => Kw::Then,
        "else" => Kw::Else,
        "while" => Kw::While,
        "end" => Kw::End,
        "fn" => Kw::Fn,
        "null" => Kw::Null,
        "=" => Kw::Assign,
        _ => return Token::Identifier(identifier)
    };

    return Token::Keyword(keyword);
}