schala/src/schala_lang/parsing.rs

887 lines
25 KiB
Rust
Raw Normal View History

2017-09-06 05:09:20 -07:00
extern crate itertools;
2017-09-07 19:38:22 -07:00
use std::collections::HashMap;
2017-08-29 05:08:09 -07:00
use std::rc::Rc;
2017-09-06 05:09:20 -07:00
use std::iter::{Enumerate, Peekable};
use self::itertools::Itertools;
2017-09-09 01:25:11 -07:00
use std::vec::IntoIter;
2017-09-06 05:09:20 -07:00
use std::str::Chars;
2017-08-29 04:27:07 -07:00
2017-09-09 01:25:11 -07:00
#[derive(Debug, PartialEq, Clone)]
2017-09-04 12:17:20 -07:00
pub enum TokenType {
2017-09-06 05:09:20 -07:00
Newline, Semicolon,
2017-09-04 12:17:20 -07:00
2017-09-06 05:09:20 -07:00
LParen, RParen,
LSquareBracket, RSquareBracket,
LAngleBracket, RAngleBracket,
LCurlyBrace, RCurlyBrace,
2017-09-07 22:29:23 -07:00
Pipe,
2017-09-04 12:17:20 -07:00
2017-09-06 05:09:20 -07:00
Comma, Period, Colon, Underscore,
2017-09-04 12:17:20 -07:00
2017-09-06 05:09:20 -07:00
Operator(Rc<String>),
DigitGroup(Rc<String>), HexNumberSigil, BinNumberSigil,
2017-08-29 05:08:09 -07:00
StrLiteral(Rc<String>),
Identifier(Rc<String>),
2017-09-04 12:17:20 -07:00
Keyword(Kw),
2017-09-06 05:09:20 -07:00
2017-09-11 02:07:17 -07:00
EOF,
2017-09-06 05:09:20 -07:00
Error(String),
2017-08-29 05:08:09 -07:00
}
2017-09-11 03:13:19 -07:00
use self::TokenType::*;
2017-08-29 05:08:09 -07:00
#[derive(Debug, Clone, Copy, PartialEq)]
2017-09-04 12:17:20 -07:00
pub enum Kw {
2017-09-07 22:29:23 -07:00
If, Else,
2017-09-04 12:17:20 -07:00
Func,
2017-09-06 05:09:20 -07:00
For,
2017-09-07 23:40:42 -07:00
Match,
Var, Const, Let, In,
Alias, Type, SelfType, SelfIdent,
2017-09-07 22:29:23 -07:00
Trait, Impl,
True, False
2017-09-04 12:17:20 -07:00
}
2017-09-11 03:13:19 -07:00
use self::Kw::*;
2017-09-04 12:17:20 -07:00
2017-09-07 19:38:22 -07:00
lazy_static! {
static ref KEYWORDS: HashMap<&'static str, Kw> =
hashmap! {
"if" => Kw::If,
"else" => Kw::Else,
2017-09-07 22:29:23 -07:00
"fn" => Kw::Func,
"for" => Kw::For,
2017-09-07 23:40:42 -07:00
"match" => Kw::Match,
2017-09-07 22:29:23 -07:00
"var" => Kw::Var,
"const" => Kw::Const,
"let" => Kw::Let,
2017-09-07 23:40:42 -07:00
"in" => Kw::In,
"alias" => Kw::Alias,
2017-09-07 22:29:23 -07:00
"type" => Kw::Type,
"Self" => Kw::SelfType,
"self" => Kw::SelfIdent,
"trait" => Kw::Trait,
"impl" => Kw::Impl,
"true" => Kw::True,
"false" => Kw::False,
2017-09-07 19:38:22 -07:00
};
}
2017-09-04 12:17:20 -07:00
#[derive(Debug)]
pub struct Token {
token_type: TokenType,
2017-09-06 05:09:20 -07:00
offset: usize,
2017-09-04 12:17:20 -07:00
}
impl Token {
pub fn get_error(&self) -> Option<&String> {
match self.token_type {
TokenType::Error(ref s) => Some(s),
_ => None,
}
}
}
2017-09-06 05:09:20 -07:00
fn is_digit(c: &char) -> bool {
c.is_digit(10)
}
2017-09-17 02:38:11 -07:00
const OPERATOR_CHARS: [char; 19] = ['!', '$', '%', '&', '*', '+', '-', '.', '/', ':', '<', '>', '=', '?', '@', '^', '|', '~', '`'];
fn is_operator(c: &char) -> bool {
OPERATOR_CHARS.iter().any(|x| x == c)
}
2017-09-06 05:09:20 -07:00
type CharIter<'a> = Peekable<Enumerate<Chars<'a>>>;
pub fn tokenize(input: &str) -> Vec<Token> {
2017-09-06 05:09:20 -07:00
let mut tokens: Vec<Token> = Vec::new();
let mut input: CharIter = input.chars().enumerate().peekable();
while let Some((idx, c)) = input.next() {
let cur_tok_type = match c {
'#' => {
if let Some(&(_, '{')) = input.peek() {
} else {
while let Some((_, c)) = input.next() {
if c == '\n' {
break;
}
}
}
continue;
},
2017-09-07 22:29:23 -07:00
c if c.is_whitespace() && c != '\n' => continue,
2017-09-06 05:09:20 -07:00
'\n' => Newline, ';' => Semicolon,
2017-09-17 04:26:44 -07:00
':' => Colon, ',' => Comma,
2017-09-06 05:09:20 -07:00
'(' => LParen, ')' => RParen,
'{' => LCurlyBrace, '}' => RCurlyBrace,
'[' => LSquareBracket, ']' => RSquareBracket,
2017-09-06 09:42:29 -07:00
'"' => handle_quote(&mut input),
2017-09-06 05:09:20 -07:00
c if is_digit(&c) => handle_digit(c, &mut input),
2017-09-08 01:33:27 -07:00
c if c.is_alphabetic() || c == '_' => handle_alphabetic(c, &mut input), //TODO I'll probably have to rewrite this if I care about types being uppercase, also type parameterization
2017-09-17 02:38:11 -07:00
c if is_operator(&c) => handle_operator(c, &mut input),
unknown => Error(format!("Unexpected character: {}", unknown)),
2017-09-06 05:09:20 -07:00
};
tokens.push(Token { token_type: cur_tok_type, offset: idx });
}
tokens
2017-09-06 05:09:20 -07:00
}
fn handle_digit(c: char, input: &mut CharIter) -> TokenType {
if c == '0' && input.peek().map_or(false, |&(_, c)| { c == 'x' }) {
input.next();
HexNumberSigil
} else if c == '0' && input.peek().map_or(false, |&(_, c)| { c == 'b' }) {
input.next();
BinNumberSigil
} else {
let mut buf = c.to_string();
buf.extend(input.peeking_take_while(|&(_, ref c)| is_digit(c)).map(|(_, c)| { c }));
DigitGroup(Rc::new(buf))
}
2017-08-29 05:08:09 -07:00
}
2017-09-06 09:42:29 -07:00
fn handle_quote(input: &mut CharIter) -> TokenType {
2017-09-06 16:52:49 -07:00
let mut buf = String::new();
2017-09-07 00:18:36 -07:00
loop {
match input.next().map(|(_, c)| { c }) {
Some('"') => break,
Some('\\') => {
let next = input.peek().map(|&(_, c)| { c });
if next == Some('n') {
input.next();
buf.push('\n')
} else if next == Some('"') {
input.next();
buf.push('"');
} else if next == Some('t') {
input.next();
buf.push('\t');
}
},
Some(c) => buf.push(c),
None => return TokenType::Error(format!("Unclosed string")),
2017-09-06 16:52:49 -07:00
}
}
TokenType::StrLiteral(Rc::new(buf))
2017-09-06 09:42:29 -07:00
}
fn handle_alphabetic(c: char, input: &mut CharIter) -> TokenType {
2017-09-07 19:38:22 -07:00
let mut buf = String::new();
buf.push(c);
2017-09-08 01:33:27 -07:00
if c == '_' && input.peek().map(|&(_, c)| { !c.is_alphabetic() }).unwrap_or(true) {
2017-09-11 02:07:17 -07:00
return TokenType::Underscore
2017-09-08 01:33:27 -07:00
}
2017-09-07 19:38:22 -07:00
loop {
match input.peek().map(|&(_, c)| { c }) {
Some(c) if c.is_alphanumeric() => {
input.next();
buf.push(c);
},
_ => break,
}
}
match KEYWORDS.get(buf.as_str()) {
Some(kw) => TokenType::Keyword(kw.clone()),
None => TokenType::Identifier(Rc::new(buf)),
}
2017-09-06 09:42:29 -07:00
}
fn handle_operator(c: char, input: &mut CharIter) -> TokenType {
2017-09-17 02:38:11 -07:00
match c {
2017-09-17 04:26:44 -07:00
'<' | '>' | '|' | '.' => {
2017-09-17 02:38:11 -07:00
let ref next = input.peek().map(|&(_, c)| { c });
if !next.map(|n| { is_operator(&n) }).unwrap_or(false) {
return match c {
'<' => LAngleBracket,
'>' => RAngleBracket,
'|' => Pipe,
2017-09-17 04:26:44 -07:00
'.' => Period,
2017-09-17 02:38:11 -07:00
_ => unreachable!(),
}
}
},
_ => (),
};
2017-09-07 22:29:23 -07:00
let mut buf = String::new();
buf.push(c);
loop {
match input.peek().map(|&(_, c)| { c }) {
2017-09-17 02:38:11 -07:00
Some(c) if is_operator(&c) => {
2017-09-07 22:29:23 -07:00
input.next();
buf.push(c);
},
_ => break
}
}
TokenType::Operator(Rc::new(buf))
2017-09-06 09:42:29 -07:00
}
#[cfg(test)]
mod schala_tokenizer_tests {
use super::*;
2017-09-11 02:07:17 -07:00
macro_rules! digit { ($ident:expr) => { DigitGroup(Rc::new($ident.to_string())) } }
macro_rules! ident { ($ident:expr) => { Identifier(Rc::new($ident.to_string())) } }
macro_rules! op { ($ident:expr) => { Operator(Rc::new($ident.to_string())) } }
2017-09-12 00:48:37 -07:00
#[test]
fn tokens() {
let a = tokenize("let a: A<B> = c ++ d");
let token_types: Vec<TokenType> = a.into_iter().map(move |t| t.token_type).collect();
assert_eq!(token_types, vec![Keyword(Let), ident!("a"), Colon, ident!("A"),
LAngleBracket, ident!("B"), RAngleBracket, op!("="), ident!("c"), op!("++"), ident!("d")]);
}
2017-09-11 02:07:17 -07:00
#[test]
fn underscores() {
let token_types: Vec<TokenType> = tokenize("4_8").into_iter().map(move |t| t.token_type).collect();
assert_eq!(token_types, vec![digit!("4"), Underscore, digit!("8")]);
}
}
2017-09-06 09:42:29 -07:00
2017-08-29 05:08:09 -07:00
/*
2017-09-11 02:07:17 -07:00
Schala (PROVISIONAL!!) EBNF grammar
2017-08-30 04:28:52 -07:00
'' = literal, all other symbols are nonterminals
2017-08-29 05:08:09 -07:00
program := (statement delimiter ?)*
2017-08-30 04:28:52 -07:00
delimiter := 'Newline' | ';'
2017-08-29 05:08:09 -07:00
statement := declaration | expression
2017-08-30 04:28:52 -07:00
declaration := module | function | type_decl
type_decl := 'type' type_format
type_format := 'alias' '=' type | type_constructor
type_constructor := capital_ident '=' type_rhs
type_rhs := struct_decl | type_variant ('|' type_variant)*
struct_decl := 'struct' '{' (ident ':' type)* '}'
2017-08-30 04:28:52 -07:00
type_variant := capital_ident | tuple_type | capital_ident struct_decl
tuple_type := // something like Variant(a,b)
type := // something like Type[A[b]]
ascription := expression (':' type)+
function := 'fn' prototype '{' (statement)* '}'
prototype := identifier '(' identlist ')'
identlist := identifier (',' identifier)* | ε
2017-08-29 05:08:09 -07:00
declaration := FN prototype LCurlyBrace (statement)* RCurlyBrace
prototype := identifier LParen identlist RParen
identlist := Ident (Comma Ident)* | ε
exprlist := Expression (Comma Expression)* | ε
itemlist := Ident COLON Expression (Comma Ident COLON Expression)* | ε
expression := postop_expression (op postop_expression)*
postop_expression := primary_expression postop
primary_expression := number_expr | String | identifier_expr | paren_expr | conditional_expr | while_expr | lambda_expr | list_expr | struct_expr
number_expr := (PLUS | MINUS ) number_expr | Number
identifier_expr := call_expression | Variable
list_expr := LSquareBracket exprlist RSquareBracket
struct_expr := LCurlyBrace itemlist RCurlyBrace
call_expression := Identifier LParen exprlist RParen
while_expr := WHILE primary_expression LCurlyBrace (expression delimiter)* RCurlyBrace
paren_expr := LParen expression RParen
conditional_expr := IF expression LCurlyBrace (expression delimiter)* RCurlyBrace (LCurlyBrace (expresion delimiter)* RCurlyBrace)?
lambda_expr := FN LParen identlist RParen LCurlyBrace (expression delimiter)* RCurlyBrace
lambda_call := | LParen exprlist RParen
postop := ε | LParen exprlist RParen | LBracket expression RBracket
op := '+', '-', etc.
*/
2017-09-11 02:07:17 -07:00
2017-09-17 00:04:27 -07:00
/* for reference, here is the scala EBNF for expressions:
* see http://scala-lang.org/files/archive/spec/2.12/06-expressions.html
Expr ::= (Bindings | id | _) => Expr
| Expr1
Expr1 ::= if ( Expr ) {nl} Expr [[semi] else Expr]
| while ( Expr ) {nl} Expr
| try { Block } [catch { CaseClauses }]
[finally Expr]
| do Expr [semi] while ( Expr )
| for (( Enumerators ) | { Enumerators })
{nl} [yield] Expr
| throw Expr
| return [Expr]
| [SimpleExpr .] id = Expr
| SimpleExpr1 ArgumentExprs = Expr
| PostfixExpr
| PostfixExpr Ascription
| PostfixExpr match { CaseClauses }
PrefixExpr ::= [- | + | ~ | !] SimpleExpr
*/
2017-09-11 02:07:17 -07:00
/* Schala EBNF Grammar */
2017-09-17 00:04:27 -07:00
/* Terminal productions are in 'single quotes' or UPPERCASE if they are a class
2017-09-13 22:40:05 -07:00
* or not representable in ASCII
2017-09-11 02:07:17 -07:00
program := (statement delimiter)* EOF
2017-09-17 00:04:27 -07:00
delimiter := NEWLINE | ';'
2017-09-11 02:07:17 -07:00
statement := expression | declaration
2017-09-13 22:40:05 -07:00
declaration := type_alias | type_declaration | func_declaration
2017-09-11 02:07:17 -07:00
2017-09-17 00:04:27 -07:00
type_alias := 'alias' IDENTIFIER '=' IDENTIFIER
type_declaration := 'type' IDENTIFIER '=' type_body
type_body := variant_specifier ('|' variant_specifier)*
variant_specifier := '{' member_list '}'
2017-09-13 22:40:05 -07:00
member_list := (IDENTIFIER type_anno)*
2017-09-11 20:37:19 -07:00
2017-09-17 00:04:27 -07:00
func_declaration := 'fn' IDENTIFIER '(' param_list ')'
param_list := (IDENTIFIER type_anno+ ',')*
2017-09-11 02:07:17 -07:00
2017-09-17 00:04:27 -07:00
type_anno := ':' type
2017-09-11 23:16:37 -07:00
2017-09-12 02:30:27 -07:00
expression := precedence_expr
precedence_expr := primary
primary := literal | paren_expr | identifier_expr
2017-09-13 22:40:05 -07:00
paren_expr := LParen expression RParen
identifier_expr := call_expr | index_expr | IDENTIFIER
2017-09-17 00:04:27 -07:00
literal := 'true' | 'false' | number_literal | STR_LITERAL
2017-09-11 02:07:17 -07:00
2017-09-17 00:04:27 -07:00
call_expr := IDENTIFIER '(' expr_list ')' //TODO maybe make this optional? or no, have a bare identifier meant to be used as method taken care of in eval
index_expr := '[' (expression (',' (expression)* | ε) ']'
expr_list := expression (',' expression)* | ε
2017-09-11 15:42:49 -07:00
2017-09-11 02:07:17 -07:00
// a float_literal can still be assigned to an int in type-checking
number_literal := int_literal | float_literal
2017-09-13 22:40:05 -07:00
int_literal = ('0x' | '0b') digits
float_literal := digits ('.' digits)
digits := (DIGIT_GROUP underscore)+
2017-09-11 02:07:17 -07:00
*/
2017-09-09 01:25:11 -07:00
type TokenIter = Peekable<IntoIter<Token>>;
2017-09-11 03:21:07 -07:00
#[derive(Debug)]
2017-09-11 02:07:17 -07:00
pub struct ParseError {
pub msg: String,
}
impl ParseError {
fn new<T>(msg: &str) -> ParseResult<T> {
Err(ParseError { msg: msg.to_string() })
}
}
pub type ParseResult<T> = Result<T, ParseError>;
#[derive(Debug)]
2017-09-17 05:06:58 -07:00
pub struct ParseRecord {
production_name: String,
next_token: String,
}
2017-09-08 16:42:42 -07:00
struct Parser {
2017-09-09 01:25:11 -07:00
tokens: TokenIter,
parse_record: Vec<ParseRecord>,
2017-09-09 01:25:11 -07:00
}
impl Parser {
fn new(input: Vec<Token>) -> Parser {
Parser { tokens: input.into_iter().peekable(), parse_record: vec![] }
2017-09-09 01:25:11 -07:00
}
2017-09-11 02:07:17 -07:00
fn peek(&mut self) -> TokenType {
self.tokens.peek().map(|ref t| { t.token_type.clone() }).unwrap_or(TokenType::EOF)
2017-09-09 01:27:15 -07:00
}
2017-09-11 02:07:17 -07:00
fn next(&mut self) -> TokenType {
self.tokens.next().map(|ref t| { t.token_type.clone() }).unwrap_or(TokenType::EOF)
2017-09-09 01:25:11 -07:00
}
2017-09-08 16:42:42 -07:00
}
2017-09-11 02:07:17 -07:00
macro_rules! expect {
($self:expr, $token_type:pat, $message:expr) => {
match $self.peek() {
2017-09-11 15:42:49 -07:00
$token_type => $self.next(),
_ => return Err(ParseError { msg: $message.to_string() }),
2017-09-11 02:07:17 -07:00
}
2017-09-13 22:40:05 -07:00
};
($self:expr, $token_type:pat if $cond:expr, $message:expr) => {
match $self.peek() {
$token_type if $cond => $self.next(),
_ => return Err(ParseError { msg: $message.to_string() }),
}
2017-09-11 02:07:17 -07:00
}
2017-09-09 00:31:15 -07:00
}
2017-09-11 03:21:07 -07:00
#[derive(Debug, PartialEq)]
2017-09-08 16:42:42 -07:00
pub struct AST(Vec<Statement>);
#[derive(Debug, PartialEq)]
pub enum Statement {
Expression(Expression),
Declaration(Declaration),
}
2017-09-13 23:04:45 -07:00
type ParamName = Rc<String>;
type TypeName = Rc<String>;
type FormalParamList = Vec<(ParamName, Option<TypeName>)>;
2017-09-08 16:42:42 -07:00
#[derive(Debug, PartialEq)]
pub enum Declaration {
2017-09-13 23:04:45 -07:00
FuncDecl {
name: Rc<String>,
params: FormalParamList,
},
2017-09-13 22:40:05 -07:00
TypeDecl(Rc<String>, TypeBody),
TypeAlias(Rc<String>, Rc<String>)
2017-09-11 15:42:49 -07:00
}
#[derive(Debug, PartialEq)]
2017-09-13 22:40:05 -07:00
pub struct TypeBody(Vec<Variant>);
#[derive(Debug, PartialEq)]
pub enum Variant {
Singleton(Rc<String>),
//ArgumentConstructor,
//Record
2017-09-08 16:42:42 -07:00
}
#[derive(Debug, PartialEq)]
pub enum Expression {
IntLiteral(u64),
2017-09-08 16:42:42 -07:00
FloatLiteral(f64),
2017-09-16 15:57:48 -07:00
StringLiteral(Rc<String>),
2017-09-17 04:31:27 -07:00
BoolLiteral(bool),
BinExp(Operation, Box<Expression>, Box<Expression>),
Variable(Rc<String>),
Call {
name: Rc<String>,
params: Vec<Expression>,
},
Index {
indexee: Box<Expression>,
indexers: Vec<Expression>,
}
2017-09-12 02:30:27 -07:00
}
#[derive(Debug, PartialEq)]
pub struct Operation {
op: Rc<String>
2017-09-12 02:30:27 -07:00
}
impl Operation {
fn min_precedence() -> i32 {
i32::min_value()
}
2017-09-17 05:12:20 -07:00
fn get_precedence(op: &str) -> i32 {
let c: char = op.chars().next().unwrap();
match c {
2017-09-12 02:30:27 -07:00
'+' | '-' => 10,
'*' | '/' | '%' => 20,
_ => 30,
}
}
2017-09-08 16:42:42 -07:00
}
macro_rules! parse_method {
($name:ident(&mut $self:ident) -> $type:ty $body:block) => {
fn $name(&mut $self) -> $type {
let next_token = $self.peek();
2017-09-17 05:06:58 -07:00
let record = ParseRecord {
production_name: stringify!($name).to_string(),
next_token: format!("{:?}", next_token),
};
$self.parse_record.push(record);
$body
}
};
}
2017-09-11 02:07:17 -07:00
impl Parser {
parse_method!(program(&mut self) -> ParseResult<AST> {
let mut statements = Vec::new();
loop {
match self.peek() {
EOF => break,
Newline | Semicolon => {
self.next();
continue;
},
_ => statements.push(self.statement()?),
}
}
Ok(AST(statements))
});
2017-09-11 02:07:17 -07:00
parse_method!(statement(&mut self) -> ParseResult<Statement> {
2017-09-11 02:07:17 -07:00
//TODO handle error recovery here
match self.peek() {
2017-09-13 22:40:05 -07:00
Keyword(Alias) => self.type_alias().map(|alias| { Statement::Declaration(alias) }),
2017-09-11 02:07:17 -07:00
Keyword(Type) => self.type_declaration().map(|decl| { Statement::Declaration(decl) }),
Keyword(Func)=> self.func_declaration().map(|func| { Statement::Declaration(func) }),
_ => self.expression().map(|expr| { Statement::Expression(expr) } ),
}
});
2017-09-11 02:07:17 -07:00
parse_method!(type_alias(&mut self) -> ParseResult<Declaration> {
2017-09-13 22:40:05 -07:00
expect!(self, Keyword(Alias), "Expected 'alias'");
let alias = self.identifier()?;
expect!(self, Operator(ref c) if **c == "=", "Expected '='");
let original = self.identifier()?;
Ok(Declaration::TypeAlias(alias, original))
});
2017-09-13 22:40:05 -07:00
parse_method!(type_declaration(&mut self) -> ParseResult<Declaration> {
2017-09-11 15:42:49 -07:00
expect!(self, Keyword(Type), "Expected 'type'");
let name = self.identifier()?;
2017-09-13 22:40:05 -07:00
expect!(self, Operator(ref c) if **c == "=", "Expected '='");
let body = self.type_body()?;
Ok(Declaration::TypeDecl(name, body))
});
2017-09-13 22:40:05 -07:00
parse_method!(type_body(&mut self) -> ParseResult<TypeBody> {
2017-09-13 22:40:05 -07:00
let variant = Variant::Singleton(self.identifier()?);
Ok(TypeBody(vec!(variant)))
});
2017-09-11 02:07:17 -07:00
parse_method!(func_declaration(&mut self) -> ParseResult<Declaration> {
2017-09-11 20:37:19 -07:00
expect!(self, Keyword(Func), "Expected 'fn'");
let name = self.identifier()?;
expect!(self, LParen, "Expected '('");
2017-09-13 23:04:45 -07:00
let params = self.param_list()?;
2017-09-11 20:37:19 -07:00
expect!(self, RParen, "Expected ')'");
2017-09-13 23:04:45 -07:00
let decl = Declaration::FuncDecl {
name: name,
params: params
};
Ok(decl)
});
2017-09-11 20:37:19 -07:00
parse_method!(param_list(&mut self) -> ParseResult<FormalParamList> {
2017-09-11 20:37:19 -07:00
Ok(vec!())
});
2017-09-11 02:07:17 -07:00
parse_method!(expression(&mut self) -> ParseResult<Expression> {
self.precedence_expr(Operation::min_precedence())
});
// this implements Pratt parsing, see http://journal.stuffwithstuff.com/2011/03/19/pratt-parsers-expression-parsing-made-easy/
fn precedence_expr(&mut self, precedence: i32) -> ParseResult<Expression> {
use self::Expression::*;
2017-09-17 05:06:58 -07:00
let next_token = self.peek();
let record = ParseRecord {
production_name: "precedence_expr".to_string(),
next_token: format!("{:?}", next_token),
};
self.parse_record.push(record);
//TODO clean this up
let mut lhs = self.primary()?;
loop {
2017-09-17 05:12:20 -07:00
let new_precedence = match self.peek() {
Operator(op) => Operation::get_precedence(&*op),
Period => Operation::get_precedence("."),
_ => break,
};
2017-09-17 05:12:20 -07:00
if precedence >= new_precedence {
break;
}
let op_str = match self.next() {
Operator(op) => op,
2017-09-17 04:26:44 -07:00
Period => Rc::new(".".to_string()),
_ => unreachable!(),
};
let rhs = self.precedence_expr(new_precedence)?;
let operation = Operation { op: op_str };
lhs = BinExp(operation, Box::new(lhs), Box::new(rhs));
}
2017-09-12 02:30:27 -07:00
Ok(lhs)
2017-09-11 02:07:17 -07:00
}
parse_method!(primary(&mut self) -> ParseResult<Expression> {
2017-09-13 03:46:16 -07:00
match self.peek() {
LParen => self.paren_expr(),
Identifier(_) => self.identifier_expr(),
2017-09-13 03:46:16 -07:00
_ => self.literal(),
}
});
2017-09-13 03:46:16 -07:00
parse_method!(paren_expr(&mut self) -> ParseResult<Expression> {
2017-09-13 03:46:16 -07:00
expect!(self, LParen, "Expected '('");
let expr = self.expression()?;
expect!(self, RParen, "Expected ')'");
Ok(expr)
});
2017-09-11 15:42:49 -07:00
fn identifier_expr(&mut self) -> ParseResult<Expression> {
let identifier = self.identifier()?;
match self.peek() {
LParen => {
2017-09-15 03:49:47 -07:00
let call_params = self.call_expr()?;
Ok(Expression::Call {
name: identifier,
params: call_params,
})
},
LSquareBracket => {
let indexers = self.index_expr()?;
Ok(Expression::Index {
indexee: Box::new(Expression::Variable(identifier)),
indexers: indexers,
})
}
_ => Ok(Expression::Variable(identifier))
}
}
parse_method!(call_expr(&mut self) -> ParseResult<Vec<Expression>> {
2017-09-15 03:49:47 -07:00
let mut exprs = Vec::new();
expect!(self, LParen, "Expected '('");
loop {
if let RParen = self.peek() {
break;
}
exprs.push(self.expression()?);
match self.peek() {
Comma => { self.next(); },
_ => break,
}
}
expect!(self, RParen, "Expected ')'");
Ok(exprs)
});
parse_method!(index_expr(&mut self) -> ParseResult<Vec<Expression>> {
expect!(self, LSquareBracket, "Expected '['");
let mut exprs = Vec::new();
loop {
if let RSquareBracket = self.peek() {
break;
}
exprs.push(self.expression()?);
match self.peek() {
Comma => { self.next(); }
_ => break,
};
}
expect!(self, RSquareBracket, "Expected ']'");
Ok(exprs)
});
parse_method!(identifier(&mut self) -> ParseResult<Rc<String>> {
2017-09-11 15:42:49 -07:00
match self.next() {
Identifier(s) => Ok(s),
p => ParseError::new(&format!("Expected an identifier, got {:?}", p)),
}
});
2017-09-11 02:07:17 -07:00
parse_method!(literal(&mut self) -> ParseResult<Expression> {
2017-09-11 02:07:17 -07:00
match self.peek() {
DigitGroup(_) | HexNumberSigil | BinNumberSigil | Period => self.number_literal(),
2017-09-17 04:31:27 -07:00
Keyword(Kw::True) => { self.next(); Ok(Expression::BoolLiteral(true)) },
Keyword(Kw::False) => { self.next(); Ok(Expression::BoolLiteral(false)) },
2017-09-16 15:57:48 -07:00
StrLiteral(s) => {
self.next();
Ok(Expression::StringLiteral(s))
}
e => ParseError::new(&format!("Expected a literal expression, got {:?}", e)),
2017-09-11 02:07:17 -07:00
}
});
parse_method!(number_literal(&mut self) -> ParseResult<Expression> {
2017-09-11 02:07:17 -07:00
match self.peek() {
HexNumberSigil | BinNumberSigil => self.int_literal(),
_ => self.float_literal(),
}
});
2017-09-11 02:07:17 -07:00
parse_method!(int_literal(&mut self) -> ParseResult<Expression> {
2017-09-11 02:07:17 -07:00
use self::Expression::*;
match self.next() {
BinNumberSigil => {
2017-09-11 23:27:15 -07:00
let digits = self.digits()?;
let n = parse_binary(digits)?;
Ok(IntLiteral(n))
2017-09-11 02:07:17 -07:00
},
HexNumberSigil => {
2017-09-16 17:44:06 -07:00
ParseError::new("Not implemented")
2017-09-11 02:07:17 -07:00
},
_ => return ParseError::new("Expected '0x' or '0b'"),
}
});
2017-09-11 02:07:17 -07:00
parse_method!(float_literal(&mut self) -> ParseResult<Expression> {
2017-09-11 02:07:17 -07:00
use self::Expression::*;
2017-09-11 02:38:27 -07:00
let mut digits = self.digits()?;
if let TokenType::Period = self.peek() {
self.next();
digits.push_str(".");
digits.push_str(&self.digits()?);
match digits.parse::<f64>() {
Ok(f) => Ok(FloatLiteral(f)),
2017-09-16 17:44:06 -07:00
Err(e) => ParseError::new(&format!("Float failed to parse with error: {}", e)),
2017-09-11 02:38:27 -07:00
}
} else {
match digits.parse::<u64>() {
Ok(d) => Ok(IntLiteral(d)),
2017-09-16 17:44:06 -07:00
Err(e) => ParseError::new(&format!("Integer failed to parse with error: {}", e)),
2017-09-11 02:38:27 -07:00
}
2017-09-11 02:07:17 -07:00
}
});
2017-09-11 02:07:17 -07:00
parse_method!(digits(&mut self) -> ParseResult<String> {
2017-09-11 02:07:17 -07:00
let mut ds = String::new();
loop {
2017-09-11 02:38:27 -07:00
match self.peek() {
Underscore => { self.next(); continue; },
DigitGroup(ref s) => { self.next(); ds.push_str(s)},
2017-09-11 02:07:48 -07:00
_ => break,
2017-09-11 02:07:17 -07:00
}
}
Ok(ds)
});
2017-09-11 02:07:17 -07:00
}
2017-09-09 00:31:15 -07:00
2017-09-11 23:27:15 -07:00
fn parse_binary(digits: String) -> ParseResult<u64> {
let mut result: u64 = 0;
let mut multiplier = 1;
for d in digits.chars().rev() {
match d {
'1' => result += multiplier,
'0' => (),
_ => return ParseError::new("Encountered a character not '1' or '0 while parsing a binary literal"),
}
multiplier *= 2;
}
Ok(result)
}
2017-09-16 14:29:22 -07:00
pub fn parse(input: Vec<Token>) -> (Result<AST, ParseError>, Vec<String>) {
2017-09-09 00:31:15 -07:00
let mut parser = Parser::new(input);
let ast = parser.program();
2017-09-17 05:06:58 -07:00
let trace = parser.parse_record.into_iter().map(|r| {
format!("Production `{}`, token: {:?}", r.production_name, r.next_token)
}).collect();
(ast, trace)
2017-08-29 05:08:09 -07:00
}
2017-09-11 03:21:07 -07:00
#[cfg(test)]
mod parse_tests {
use super::*;
use super::Statement::*;
2017-09-13 22:40:05 -07:00
use super::Declaration::*;
2017-09-11 03:21:07 -07:00
use super::Expression::*;
2017-09-12 00:48:37 -07:00
2017-09-13 22:40:05 -07:00
macro_rules! rc {
2017-09-13 22:47:25 -07:00
($string:tt) => { Rc::new(stringify!($string).to_string()) }
2017-09-13 22:40:05 -07:00
}
2017-09-12 00:48:37 -07:00
macro_rules! parse_test {
2017-09-16 15:57:48 -07:00
($string:expr, $correct:expr) => { assert_eq!(parse(tokenize($string)).0.unwrap(), $correct) }
2017-09-12 00:48:37 -07:00
}
2017-09-13 22:49:45 -07:00
macro_rules! binexp {
($op:expr, $lhs:expr, $rhs:expr) => { BinExp($op, Box::new($lhs), Box::new($rhs)) }
}
macro_rules! op {
($op:expr) => { Operation { op: Rc::new($op.to_string()) } }
}
2017-09-13 20:49:17 -07:00
macro_rules! var {
($var:expr) => { Variable(Rc::new($var.to_string())) }
}
2017-09-12 00:48:37 -07:00
2017-09-11 03:21:07 -07:00
#[test]
2017-09-13 03:46:16 -07:00
fn parsing_number_literals_and_binexps() {
2017-09-17 04:26:44 -07:00
parse_test!(".2", AST(vec![Expression(FloatLiteral(0.2))]));
2017-09-12 00:48:37 -07:00
parse_test!("8.1", AST(vec![Expression(FloatLiteral(8.1))]));
parse_test!("0b010", AST(vec![Expression(IntLiteral(2))]));
2017-09-12 02:33:21 -07:00
parse_test!("3; 4; 4.3", AST(
vec![Expression(IntLiteral(3)), Expression(IntLiteral(4)),
Expression(FloatLiteral(4.3))]));
parse_test!("1 + 2 * 3", AST(vec!
[
Expression(binexp!(op!("+"), IntLiteral(1), binexp!(op!("*"), IntLiteral(2), IntLiteral(3))))
]));
parse_test!("1 * 2 + 3", AST(vec!
[
Expression(binexp!(op!("+"), binexp!(op!("*"), IntLiteral(1), IntLiteral(2)), IntLiteral(3)))
]));
2017-09-12 15:52:23 -07:00
parse_test!("1 && 2", AST(vec![Expression(binexp!(op!("&&"), IntLiteral(1), IntLiteral(2)))]));
2017-09-13 03:46:16 -07:00
parse_test!("1 + 2 * 3 + 4", AST(vec![Expression(
binexp!(op!("+"),
binexp!(op!("+"), IntLiteral(1),
binexp!(op!("*"), IntLiteral(2), IntLiteral(3))
),
IntLiteral(4)
)
)]));
2017-09-12 15:52:23 -07:00
2017-09-13 03:46:16 -07:00
parse_test!("(1 + 2) * 3", AST(vec!
[
Expression(binexp!(op!("*"), binexp!(op!("+"), IntLiteral(1), IntLiteral(2)), IntLiteral(3)))
]));
2017-09-17 04:26:44 -07:00
parse_test!(".1 + .2", AST(vec![Expression(binexp!(op!("+"), FloatLiteral(0.1), FloatLiteral(0.2)))]));
2017-09-11 03:21:07 -07:00
}
2017-09-13 03:46:16 -07:00
2017-09-13 20:49:17 -07:00
#[test]
fn parsing_identifiers() {
parse_test!("a", AST(vec![Expression(var!("a"))]));
parse_test!("a + b", AST(vec![Expression(binexp!(op!("+"), var!("a"), var!("b")))]));
//parse_test!("a[b]", AST(vec![Expression(
//parse_test!("a[]", <- TODO THIS NEEDS TO FAIL
//parse_test!(damn()[a] ,<- TODO needs to succeed
parse_test!("a[b,c]", AST(vec![Expression(Index { indexee: Box::new(var!("a")), indexers: vec![var!("b"), var!("c")]} )]));
2017-09-13 20:49:17 -07:00
}
2017-09-13 22:40:05 -07:00
2017-09-17 02:38:11 -07:00
#[test]
fn parse_complicated_operators() {
parse_test!("a <- b", AST(vec![Expression(binexp!(op!("<-"), var!("a"), var!("b")))]));
parse_test!("a || b", AST(vec![Expression(binexp!(op!("||"), var!("a"), var!("b")))]));
parse_test!("a<>b", AST(vec![Expression(binexp!(op!("<>"), var!("a"), var!("b")))]));
2017-09-17 04:26:44 -07:00
parse_test!("a.b.c.d", AST(vec![Expression(binexp!(op!("."),
binexp!(op!("."),
binexp!(op!("."), var!("a"), var!("b")),
var!("c")),
var!("d")))]));
2017-09-17 02:38:11 -07:00
}
2017-09-13 23:37:49 -07:00
#[test]
fn parsing_functions() {
parse_test!("fn oi()", AST(vec![Declaration(FuncDecl { name: rc!(oi), params: vec![] })]));
2017-09-15 03:49:47 -07:00
parse_test!("oi()", AST(vec![Expression(Call { name: rc!(oi), params: vec![] })]));
parse_test!("oi(a, 2 + 2)", AST(vec![Expression(Call
{ name: rc!(oi),
params: vec![var!("a"), binexp!(op!("+"), IntLiteral(2), IntLiteral(2))]
})]));
2017-09-13 23:37:49 -07:00
}
2017-09-17 04:31:27 -07:00
#[test]
fn parse_bools() {
parse_test!("false", AST(vec![Expression(BoolLiteral(false))]));
parse_test!("true", AST(vec![Expression(BoolLiteral(true))]));
}
2017-09-16 15:57:48 -07:00
#[test]
fn parsing_strings() {
parse_test!(r#""hello""#, AST(vec![Expression(StringLiteral(rc!(hello)))]));
}
2017-09-13 22:40:05 -07:00
#[test]
fn parsing_types() {
2017-09-13 22:47:25 -07:00
parse_test!("type Yolo = Yolo", AST(vec![Declaration(TypeDecl(rc!(Yolo), TypeBody(vec![Variant::Singleton(rc!(Yolo))])))]));
2017-09-13 22:49:45 -07:00
parse_test!("alias Sex = Drugs", AST(vec![Declaration(TypeAlias(rc!(Sex), rc!(Drugs)))]));
2017-09-13 22:40:05 -07:00
}
2017-09-11 03:21:07 -07:00
}