schala/src/parser.rs

541 lines
16 KiB
Rust
Raw Normal View History

2016-01-22 02:55:07 -08:00
use std::fmt;
2016-01-15 03:27:24 -08:00
use tokenizer::{Token, Kw, Op};
2016-12-31 03:35:46 -08:00
use std::collections::VecDeque;
use std::rc::Rc;
2015-12-24 22:01:59 -08:00
2016-12-29 02:04:03 -08:00
// Grammar
// program := (statement delimiter ?)*
// delimiter := Newline | Semicolon
// statement := declaration | expression
// declaration := Fn prototype (statement)* End
2016-12-29 02:04:03 -08:00
// prototype := identifier LParen identlist RParen
// identlist := Ident (Comma Ident)* | e
// exprlist := Expression (Comma Expression)* | e
//
// expression := primary_expression (op primary_expression)*
// primary_expression := Number | String | identifier_expr | paren_expr | conditional_expr
// identifier_expr := call_expression | Variable
// paren_expr := LParen expression RParen
// call_expr := Identifier LParen exprlist RParen
// conditional_expr := IF expression THEN (expression delimiter?)* ELSE (expresion delimiter?)* END
// op := '+', '-', etc.
//
2016-01-12 03:29:28 -08:00
pub type AST = Vec<Statement>;
2016-01-10 01:15:34 -08:00
#[derive(Debug, Clone)]
2017-01-03 02:45:36 -08:00
pub enum Statement {
2016-01-10 01:15:34 -08:00
ExprNode(Expression),
FuncDefNode(Function),
2016-01-10 01:15:34 -08:00
}
impl fmt::Display for Statement {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
use self::Statement::*;
match *self {
ExprNode(ref expr) => write!(f, "{}", expr),
FuncDefNode(_) => write!(f, "UNIMPLEMENTED"),
}
}
}
2016-01-10 01:15:34 -08:00
#[derive(Debug, Clone)]
pub struct Function {
pub prototype: Prototype,
2017-01-03 02:45:36 -08:00
pub body: Vec<Statement>,
2016-01-10 01:15:34 -08:00
}
2016-01-17 00:08:46 -08:00
#[derive(Debug, Clone, PartialEq)]
2016-01-10 01:15:34 -08:00
pub struct Prototype {
pub name: Rc<String>,
pub parameters: Vec<Rc<String>>,
2016-01-10 01:15:34 -08:00
}
#[derive(Debug, Clone)]
pub enum Expression {
2016-01-21 19:10:21 -08:00
Null,
StringLiteral(Rc<String>),
Number(f64),
Variable(Rc<String>),
BinExp(Rc<String>, Box<Expression>, Box<Expression>),
Call(Rc<String>, Vec<Expression>),
2016-12-31 03:35:46 -08:00
Conditional(Box<Expression>, Box<Expression>, Option<Box<Expression>>),
Lambda(Function),
2016-12-31 03:35:46 -08:00
Block(VecDeque<Expression>),
While(Box<Expression>, Vec<Expression>),
2016-01-10 01:15:34 -08:00
}
2016-01-22 02:55:07 -08:00
impl fmt::Display for Expression {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
use self::Expression::*;
2017-01-02 22:17:21 -08:00
match *self {
Null => write!(f, "null"),
StringLiteral(ref s) => write!(f, "\"{}\"", s),
Number(n) => write!(f, "{}", n),
Lambda(Function { prototype: Prototype { ref name, ref parameters, .. }, .. }) => {
2017-01-02 22:15:14 -08:00
write!(f, "«function: {}, {} arg(s)»", name, parameters.len())
}
2016-01-22 02:55:07 -08:00
_ => write!(f, "UNIMPLEMENTED"),
}
}
}
2016-01-15 03:27:24 -08:00
type Precedence = u8;
2016-12-29 02:04:03 -08:00
// TODO make this support incomplete parses
2016-01-10 01:15:34 -08:00
pub type ParseResult<T> = Result<T, ParseError>;
2015-12-24 22:01:59 -08:00
#[derive(Debug)]
2016-01-10 01:15:34 -08:00
pub struct ParseError {
2016-12-29 02:04:03 -08:00
pub msg: String,
pub remaining_tokens: Vec<Token>,
}
2016-01-10 01:15:34 -08:00
impl ParseError {
fn result_from_str<T>(msg: &str) -> ParseResult<T> {
2017-01-01 19:45:27 -08:00
Err(ParseError {
msg: msg.to_string(),
remaining_tokens: vec![],
})
2016-01-10 01:15:34 -08:00
}
}
2015-12-25 02:03:11 -08:00
2016-01-12 01:58:12 -08:00
struct Parser {
tokens: Vec<Token>,
}
2016-01-12 01:58:12 -08:00
impl Parser {
fn initialize(tokens: &[Token]) -> Parser {
let mut tokens = tokens.to_vec();
tokens.reverse();
Parser { tokens: tokens }
}
fn peek(&self) -> Option<Token> {
2016-01-12 01:58:12 -08:00
self.tokens.last().map(|x| x.clone())
}
2016-12-29 02:04:03 -08:00
fn next(&mut self) -> Option<Token> {
2016-01-12 03:26:28 -08:00
self.tokens.pop()
2016-01-12 01:58:12 -08:00
}
2016-01-15 03:27:24 -08:00
fn get_precedence(&self, op: &Op) -> Precedence {
match &op.0[..] {
2016-01-15 03:27:24 -08:00
"+" => 10,
"-" => 10,
"*" => 20,
"/" => 20,
"%" => 20,
2016-12-27 01:39:22 -08:00
"=" => 1,
"==" => 40,
">" | ">=" | "<" | "<=" => 30,
2016-01-15 03:27:24 -08:00
_ => 255,
}
}
}
macro_rules! expect {
($self_:expr, $token:pat) => {
2016-01-12 01:58:12 -08:00
match $self_.peek() {
Some($token) => {$self_.next();},
Some(x) => {
let err = format!("Expected `{:?}` but got `{:?}`", stringify!($token), x); //TODO implement Display for token
return ParseError::result_from_str(&err)
},
None => {
let err = format!("Expected `{:?}` but got end of input", stringify!($token));
return ParseError::result_from_str(&err) //TODO make this not require 2 stringifications
}
2016-01-12 01:58:12 -08:00
}
}
}
2016-01-16 20:23:43 -08:00
macro_rules! expect_identifier {
($self_:expr) => {
match $self_.peek() {
Some(Identifier(s)) => {$self_.next(); s},
_ => return ParseError::result_from_str("Expected Identifier")
}
}
}
2016-01-12 04:04:14 -08:00
fn is_delimiter(token: &Token) -> bool {
use tokenizer::Token::*;
match *token {
Newline | Semicolon => true,
2016-12-29 02:04:03 -08:00
_ => false,
2016-01-12 04:04:14 -08:00
}
}
2016-01-12 01:58:12 -08:00
impl Parser {
fn program(&mut self) -> ParseResult<AST> {
let mut ast = Vec::new(); //TODO have this come from previously-parsed tree
loop {
2017-01-03 02:45:36 -08:00
let result: ParseResult<Statement> = match self.peek() {
2016-12-29 02:04:03 -08:00
Some(ref t) if is_delimiter(&t) => {
self.next();
continue;
}
2016-01-17 00:50:23 -08:00
Some(_) => self.statement(),
None => break,
2016-01-12 01:58:12 -08:00
};
match result {
Ok(node) => ast.push(node),
Err(mut err) => {
err.remaining_tokens = self.tokens.clone();
err.remaining_tokens.reverse();
2017-01-01 19:45:27 -08:00
return Err(err);
}
2016-01-12 01:58:12 -08:00
}
}
2016-01-12 01:58:12 -08:00
Ok(ast)
}
2017-01-03 02:45:36 -08:00
fn statement(&mut self) -> ParseResult<Statement> {
2016-01-12 01:58:12 -08:00
use tokenizer::Token::*;
2017-01-03 02:45:36 -08:00
let node: Statement = match self.peek() {
2016-01-17 00:50:23 -08:00
Some(Keyword(Kw::Fn)) => try!(self.declaration()),
2017-01-03 02:45:36 -08:00
Some(_) => Statement::ExprNode(try!(self.expression())),
2016-01-17 00:50:23 -08:00
None => panic!("unexpected end of tokens"),
2016-01-12 01:58:12 -08:00
};
2016-01-12 01:58:12 -08:00
Ok(node)
}
2017-01-03 02:45:36 -08:00
fn declaration(&mut self) -> ParseResult<Statement> {
2016-01-12 01:58:12 -08:00
use tokenizer::Token::*;
expect!(self, Keyword(Kw::Fn));
2016-01-12 01:58:12 -08:00
let prototype = try!(self.prototype());
2017-01-03 02:45:36 -08:00
let body: Vec<Statement> = try!(self.body());
expect!(self, Keyword(Kw::End));
2017-01-03 02:45:36 -08:00
Ok(Statement::FuncDefNode(Function {
2016-12-29 02:04:03 -08:00
prototype: prototype,
body: body,
}))
2016-01-12 01:58:12 -08:00
}
fn prototype(&mut self) -> ParseResult<Prototype> {
use tokenizer::Token::*;
let name: Rc<String> = expect_identifier!(self);
expect!(self, LParen);
let parameters: Vec<Rc<String>> = try!(self.identlist());
expect!(self, RParen);
2016-12-29 02:04:03 -08:00
Ok(Prototype {
name: name,
parameters: parameters,
})
2016-01-12 01:58:12 -08:00
}
fn identlist(&mut self) -> ParseResult<Vec<Rc<String>>> {
2016-01-12 01:58:12 -08:00
use tokenizer::Token::*;
let mut args: Vec<Rc<String>> = Vec::new();
2016-01-17 01:17:54 -08:00
while let Some(Identifier(name)) = self.peek() {
args.push(name.clone());
2016-01-17 01:17:54 -08:00
self.next();
if let Some(Comma) = self.peek() {
self.next();
} else {
break;
2016-01-12 01:58:12 -08:00
}
}
2016-01-12 01:58:12 -08:00
Ok(args)
}
2016-01-16 20:23:43 -08:00
fn exprlist(&mut self) -> ParseResult<Vec<Expression>> {
use tokenizer::Token::*;
let mut args: Vec<Expression> = Vec::new();
loop {
if let Some(RParen) = self.peek() {
break;
}
let exp = try!(self.expression());
args.push(exp);
if let Some(Comma) = self.peek() {
self.next();
} else {
break;
}
}
Ok(args)
}
2017-01-03 02:45:36 -08:00
fn body(&mut self) -> ParseResult<Vec<Statement>> {
2016-01-12 01:58:12 -08:00
use tokenizer::Token::*;
let mut statements = Vec::new();
2016-01-12 04:04:14 -08:00
loop {
match self.peek() {
2016-12-29 02:04:03 -08:00
Some(ref t) if is_delimiter(t) => {
self.next();
continue;
}
2016-01-12 04:04:14 -08:00
Some(Keyword(Kw::End)) => break,
_ => {
let ast_node = try!(self.statement());
statements.push(ast_node);
2016-01-12 04:04:14 -08:00
}
}
}
Ok(statements)
2016-01-12 01:58:12 -08:00
}
2016-01-12 04:04:14 -08:00
fn expression(&mut self) -> ParseResult<Expression> {
2016-01-15 03:27:24 -08:00
let lhs: Expression = try!(self.primary_expression());
self.precedence_expr(lhs, 0)
}
2016-12-29 02:04:03 -08:00
fn precedence_expr(&mut self,
mut lhs: Expression,
min_precedence: u8)
-> ParseResult<Expression> {
2016-01-15 03:27:24 -08:00
use tokenizer::Token::*;
while let Some(Operator(op)) = self.peek() {
let precedence = self.get_precedence(&op);
if precedence < min_precedence {
break;
}
self.next();
let mut rhs = try!(self.primary_expression());
while let Some(Operator(ref op)) = self.peek() {
if self.get_precedence(op) > precedence {
let new_prec = self.get_precedence(op);
rhs = try!(self.precedence_expr(rhs, new_prec));
} else {
break;
}
}
lhs = Expression::BinExp(op.0, Box::new(lhs), Box::new(rhs));
}
2016-01-15 03:27:24 -08:00
Ok(lhs)
2016-01-13 23:29:18 -08:00
}
fn primary_expression(&mut self) -> ParseResult<Expression> {
use tokenizer::Token::*;
2016-01-17 00:50:23 -08:00
Ok(match self.peek() {
2016-12-29 02:04:03 -08:00
Some(Keyword(Kw::Null)) => {
self.next();
Expression::Null
}
Some(NumLiteral(n)) => {
self.next();
Expression::Number(n)
}
Some(StrLiteral(s)) => {
self.next();
Expression::StringLiteral(s)
}
Some(Keyword(Kw::If)) => try!(self.conditional_expr()),
Some(Keyword(Kw::While)) => try!(self.while_expr()),
2016-12-29 02:04:03 -08:00
Some(Identifier(_)) => try!(self.identifier_expr()),
Some(Token::LParen) => try!(self.paren_expr()),
Some(e) => {
2017-01-01 19:45:27 -08:00
return ParseError::result_from_str(&format!("Expected primary expression, got \
{:?}",
e));
}
2016-12-29 02:04:03 -08:00
None => return ParseError::result_from_str("Expected primary expression received EoI"),
2016-01-17 00:50:23 -08:00
})
2016-01-12 01:58:12 -08:00
}
2016-01-15 01:04:54 -08:00
fn while_expr(&mut self) -> ParseResult<Expression> {
use tokenizer::Token::*;
use self::Expression::*;
expect!(self, Keyword(Kw::While));
let test = try!(self.expression());
let mut body = Vec::new();
loop {
match self.peek() {
None |
Some(Keyword(Kw::End)) => break,
Some(Semicolon) | Some(Newline) => {
self.next();
continue;
}
_ => {
let exp = try!(self.expression());
body.push(exp);
}
}
}
expect!(self, Keyword(Kw::End));
Ok(While(Box::new(test), body))
}
fn conditional_expr(&mut self) -> ParseResult<Expression> {
use tokenizer::Token::*;
2016-12-31 03:35:46 -08:00
use self::Expression::*;
expect!(self, Keyword(Kw::If));
let test = try!(self.expression());
loop {
match self.peek() {
Some(ref t) if is_delimiter(t) => {
self.next();
continue;
}
_ => break,
}
}
expect!(self, Keyword(Kw::Then));
2016-12-31 03:35:46 -08:00
let mut then_block = VecDeque::new();
loop {
match self.peek() {
2017-01-01 19:45:27 -08:00
None |
Some(Keyword(Kw::Else)) |
Some(Keyword(Kw::End)) => break,
Some(Semicolon) | Some(Newline) => {
self.next();
continue;
}
_ => {
let exp = try!(self.expression());
2016-12-31 03:35:46 -08:00
then_block.push_back(exp);
}
}
}
let else_block = if let Some(Keyword(Kw::Else)) = self.peek() {
self.next();
2016-12-31 03:35:46 -08:00
let mut else_exprs = VecDeque::new();
loop {
match self.peek() {
2017-01-01 19:45:27 -08:00
None |
Some(Keyword(Kw::End)) => break,
Some(Semicolon) | Some(Newline) => {
self.next();
continue;
}
_ => {
let exp = try!(self.expression());
2016-12-31 03:35:46 -08:00
else_exprs.push_back(exp);
}
}
}
Some(else_exprs)
} else {
None
};
expect!(self, Keyword(Kw::End));
2017-01-01 19:45:27 -08:00
Ok(Conditional(Box::new(test),
Box::new(Block(then_block)),
else_block.map(|list| Box::new(Block(list)))))
}
2016-01-16 20:23:43 -08:00
fn identifier_expr(&mut self) -> ParseResult<Expression> {
use tokenizer::Token::*;
let name = expect_identifier!(self);
let expr = match self.peek() {
Some(LParen) => {
let args = try!(self.call_expr());
Expression::Call(name, args)
2016-12-29 02:04:03 -08:00
}
__ => Expression::Variable(name),
2016-01-16 20:23:43 -08:00
};
Ok(expr)
}
fn call_expr(&mut self) -> ParseResult<Vec<Expression>> {
2016-01-16 17:00:54 -08:00
use tokenizer::Token::*;
expect!(self, LParen);
2016-01-17 00:16:01 -08:00
let args: Vec<Expression> = try!(self.exprlist());
expect!(self, RParen);
2016-01-16 20:23:43 -08:00
Ok(args)
2016-01-15 01:04:54 -08:00
}
fn paren_expr(&mut self) -> ParseResult<Expression> {
expect!(self, Token::LParen);
2016-01-15 01:15:57 -08:00
let expr = try!(self.expression());
expect!(self, Token::RParen);
2016-01-15 01:15:57 -08:00
Ok(expr)
2016-01-15 01:04:54 -08:00
}
}
2017-01-03 02:45:36 -08:00
pub fn parse(tokens: &[Token], _parsed_tree: &[Statement]) -> ParseResult<AST> {
2016-01-12 01:58:12 -08:00
let mut parser = Parser::initialize(tokens);
parser.program()
2015-12-25 02:03:11 -08:00
}
#[cfg(test)]
mod tests {
use tokenizer;
use super::*;
2017-01-03 02:45:36 -08:00
use super::Statement::*;
use super::Expression::*;
macro_rules! parsetest {
($input:expr, $output:pat, $ifexpr:expr) => {
{
let tokens = tokenizer::tokenize($input).unwrap();
let ast = parse(&tokens, &[]).unwrap();
match &ast[..] {
$output if $ifexpr => (),
x => panic!("Error in parse test, got {:?} instead", x)
}
}
}
}
2016-01-17 00:08:46 -08:00
#[test]
fn call_parse_test() {
use super::Function;
parsetest!(
"fn a() 1 + 2 end",
&[FuncDefNode(Function {prototype: Prototype { ref name, ref parameters }, ref body})],
match &body[..] { &[ExprNode(BinExp(_, box Number(1.0), box Number(2.0)))] => true, _ => false }
2016-12-28 19:46:06 -08:00
&& name == "a" && match &parameters[..] { &[] => true, _ => false }
2016-01-17 00:08:46 -08:00
);
parsetest!(
"fn a(x,y) 1 + 2 end",
&[FuncDefNode(Function {prototype: Prototype { ref name, ref parameters }, ref body})],
match &body[..] { &[ExprNode(BinExp(_, box Number(1.0), box Number(2.0)))] => true, _ => false }
2016-12-28 19:46:06 -08:00
&& name == "a" && *parameters == ["x","y"]
2016-01-17 00:08:46 -08:00
);
}
#[test]
fn expression_parse_test() {
2016-12-28 19:46:06 -08:00
parsetest!("a", &[ExprNode(Variable(ref s))], s == "a");
parsetest!("a + b",
2016-12-28 19:46:06 -08:00
&[ExprNode(BinExp(ref plus, box Variable(ref a), box Variable(ref b)))],
plus == "+" && a == "a" && b == "b");
2016-01-16 16:26:52 -08:00
parsetest!("a + b * c",
2016-12-28 19:46:06 -08:00
&[ExprNode(BinExp(ref plus, box Variable(ref a), box BinExp(ref mul, box Variable(ref b), box Variable(ref c))))],
2016-01-16 16:26:52 -08:00
plus == "+" && mul == "*" && a == "a" && b == "b" && c == "c");
parsetest!("a * b + c",
2016-12-28 19:46:06 -08:00
&[ExprNode(BinExp(ref plus, box BinExp(ref mul, box Variable(ref a), box Variable(ref b)), box Variable(ref c)))],
2016-01-16 16:26:52 -08:00
plus == "+" && mul == "*" && a == "a" && b == "b" && c == "c");
2016-01-16 17:00:54 -08:00
parsetest!("(a + b) * c",
2016-12-28 19:46:06 -08:00
&[ExprNode(BinExp(ref mul, box BinExp(ref plus, box Variable(ref a), box Variable(ref b)), box Variable(ref c)))],
2016-01-16 17:00:54 -08:00
plus == "+" && mul == "*" && a == "a" && b == "b" && c == "c");
}
2016-12-31 04:00:30 -08:00
#[test]
fn conditional_parse_test() {
use tokenizer;
let t1 = "if null then 20 else 40 end";
let tokens = tokenizer::tokenize(t1).unwrap();
match parse(&tokens, &[]).unwrap()[..] {
2017-01-02 16:55:26 -08:00
[ExprNode(Conditional(box Null, box Block(_), Some(box Block(_))))] => (),
_ => panic!(),
2016-12-31 04:00:30 -08:00
}
let t2 = "if null\nthen\n20\nelse\n40\nend";
let tokens2 = tokenizer::tokenize(t2).unwrap();
match parse(&tokens2, &[]).unwrap()[..] {
[ExprNode(Conditional(box Null, box Block(_), Some(box Block(_))))] => (),
_ => panic!(),
}
2016-12-31 04:00:30 -08:00
}
}