A bunch of token stuff

This commit is contained in:
greg 2017-09-06 05:09:20 -07:00
parent 8230b115de
commit f15427e5d9
2 changed files with 76 additions and 20 deletions

View File

@ -31,6 +31,7 @@ impl ProgrammingLanguageInterface for Schala {
} }
}; };
/*
let ast = match parsing::parse(tokens) { let ast = match parsing::parse(tokens) {
Ok(ast) => { Ok(ast) => {
if options.debug_parse { if options.debug_parse {
@ -43,8 +44,9 @@ impl ProgrammingLanguageInterface for Schala {
return output; return output;
} }
}; };
*/
let evaluation_output = format!("test eval"); let evaluation_output = format!("{:?}", tokens);
output.add_output(evaluation_output); output.add_output(evaluation_output);
return output; return output;
} }

View File

@ -1,29 +1,30 @@
extern crate itertools;
use language::{TokenError, ParseError}; use language::{TokenError, ParseError};
use std::rc::Rc; use std::rc::Rc;
use std::iter::{Enumerate, Peekable};
use self::itertools::Itertools;
use std::str::Chars;
#[allow(dead_code)] #[allow(dead_code)]
#[derive(Debug)] #[derive(Debug)]
pub enum TokenType { pub enum TokenType {
Newline, Newline, Semicolon,
Semicolon,
LParen, LParen, RParen,
RParen, LSquareBracket, RSquareBracket,
LAngleBracket, RAngleBracket,
LCurlyBrace, RCurlyBrace,
LSquareBracket, Comma, Period, Colon, Underscore,
RSquareBracket,
LCurlyBrace, Operator(Rc<String>),
RCurlyBrace, DigitGroup(Rc<String>), HexNumberSigil, BinNumberSigil,
Comma,
Period,
Colon,
Digit(u8),
StrLiteral(Rc<String>), StrLiteral(Rc<String>),
Identifier(Rc<String>), Identifier(Rc<String>),
Keyword(Kw), Keyword(Kw),
Operator(Rc<String>),
Error(String),
} }
#[derive(Debug)] #[derive(Debug)]
@ -31,18 +32,72 @@ pub enum Kw {
If, If,
Else, Else,
Func, Func,
For,
Loop, Loop,
} }
#[derive(Debug)] #[derive(Debug)]
pub struct Token { pub struct Token {
token_type: TokenType, token_type: TokenType,
line_number: u32, offset: usize,
char_number: u32,
} }
pub fn tokenize(_input: &str) -> Result<Vec<Token>, TokenError> { fn is_digit(c: &char) -> bool {
Ok(vec!()) c.is_digit(10)
}
type CharIter<'a> = Peekable<Enumerate<Chars<'a>>>;
pub fn tokenize(input: &str) -> Result<Vec<Token>, TokenError> {
use self::TokenType::*;
let mut tokens: Vec<Token> = Vec::new();
let mut input: CharIter = input.chars().enumerate().peekable();
while let Some((idx, c)) = input.next() {
let cur_tok_type = match c {
c if char::is_whitespace(c) && c != '\n' => continue,
'#' => {
if let Some(&(_, '{')) = input.peek() {
} else {
while let Some((_, c)) = input.next() {
if c == '\n' {
break;
}
}
}
continue;
},
'\n' => Newline, ';' => Semicolon,
':' => Colon, ',' => Comma, '_' => Underscore, '.' => Period,
'(' => LParen, ')' => RParen,
'{' => LCurlyBrace, '}' => RCurlyBrace,
'<' => LAngleBracket, '>' => RAngleBracket,
'[' => LSquareBracket, ']' => RSquareBracket,
c if is_digit(&c) => handle_digit(c, &mut input),
_ => RSquareBracket,
};
tokens.push(Token { token_type: cur_tok_type, offset: idx });
}
Ok(tokens)
}
fn handle_digit(c: char, input: &mut CharIter) -> TokenType {
use self::TokenType::*;
if c == '0' && input.peek().map_or(false, |&(_, c)| { c == 'x' }) {
input.next();
HexNumberSigil
} else if c == '0' && input.peek().map_or(false, |&(_, c)| { c == 'b' }) {
input.next();
BinNumberSigil
} else {
let mut buf = c.to_string();
buf.extend(input.peeking_take_while(|&(_, ref c)| is_digit(c)).map(|(_, c)| { c }));
DigitGroup(Rc::new(buf))
}
} }
/* /*
@ -79,7 +134,6 @@ prototype := identifier '(' identlist ')'
identlist := identifier (',' identifier)* | ε identlist := identifier (',' identifier)* | ε
declaration := FN prototype LCurlyBrace (statement)* RCurlyBrace declaration := FN prototype LCurlyBrace (statement)* RCurlyBrace
prototype := identifier LParen identlist RParen prototype := identifier LParen identlist RParen
identlist := Ident (Comma Ident)* | ε identlist := Ident (Comma Ident)* | ε