schala/src/parser.rs

300 lines
8.3 KiB
Rust
Raw Normal View History

2016-01-15 03:27:24 -08:00
use tokenizer::{Token, Kw, Op};
2015-12-24 22:01:59 -08:00
2016-01-12 03:29:28 -08:00
/* Grammar
program := (statement delimiter ?)*
delimiter := Newline | Semicolon
statement := declaration | expression
declaraion := Fn prototype (statement)* End
prototype := identifier LParen identlist RParen
identlist := Ident (Comma Ident)* | e
expression := primary_expression (op primary_expression)*
primary_expression := Variable | Number | String | call_expr | paren_expr
paren_expr := LParen expression RParen
call_expr := identifier LParen identlist RParen
op := '+', '-', etc.
*/
2016-01-10 01:15:34 -08:00
#[derive(Debug, Clone)]
pub enum ASTNode {
ExprNode(Expression),
FuncNode(Function),
}
#[derive(Debug, Clone)]
pub struct Function {
pub prototype: Prototype,
2016-01-12 03:29:03 -08:00
pub body: Vec<Expression>,
2016-01-10 01:15:34 -08:00
}
#[derive(Debug, Clone)]
pub struct Prototype {
pub name: String,
pub args: Vec<String>
}
#[derive(Debug, Clone)]
pub enum Expression {
StringLiteral(String),
Number(f64),
2016-01-10 01:15:34 -08:00
Variable(String),
BinExp(String, Box<Expression>, Box<Expression>),
Call(String, Vec<Expression>),
}
pub type AST = Vec<ASTNode>;
2016-01-15 03:27:24 -08:00
type Precedence = u8;
2016-01-10 01:15:34 -08:00
//TODO make this support incomplete parses
pub type ParseResult<T> = Result<T, ParseError>;
2015-12-24 22:01:59 -08:00
#[derive(Debug)]
2016-01-10 01:15:34 -08:00
pub struct ParseError {
pub msg: String
}
2016-01-10 01:15:34 -08:00
impl ParseError {
fn result_from_str<T>(msg: &str) -> ParseResult<T> {
2016-01-10 01:15:34 -08:00
Err(ParseError { msg: msg.to_string() })
}
}
2015-12-25 02:03:11 -08:00
2016-01-12 01:58:12 -08:00
struct Parser {
tokens: Vec<Token>,
}
2016-01-12 01:58:12 -08:00
impl Parser {
fn initialize(tokens: &[Token]) -> Parser {
let mut tokens = tokens.to_vec();
tokens.reverse();
Parser { tokens: tokens }
}
fn peek(&self) -> Option<Token> {
2016-01-12 01:58:12 -08:00
self.tokens.last().map(|x| x.clone())
}
2016-01-12 03:26:28 -08:00
fn next(&mut self) -> Option<Token>{
self.tokens.pop()
2016-01-12 01:58:12 -08:00
}
2016-01-15 03:27:24 -08:00
fn get_precedence(&self, op: &Op) -> Precedence {
2016-01-15 03:27:24 -08:00
match &op.repr[..] {
"+" => 10,
"-" => 10,
"*" => 20,
"/" => 20,
"%" => 20,
_ => 255,
}
}
}
macro_rules! expect {
2016-01-12 01:58:12 -08:00
($self_:expr, $token:pat, $error:expr) => {
match $self_.peek() {
Some($token) => {$self_.next();},
_ => return ParseError::result_from_str($error)
2016-01-12 01:58:12 -08:00
}
}
}
2016-01-12 04:04:14 -08:00
fn is_delimiter(token: &Token) -> bool {
use tokenizer::Token::*;
match *token {
Newline | Semicolon => true,
_ => false
}
}
2016-01-12 01:58:12 -08:00
impl Parser {
fn program(&mut self) -> ParseResult<AST> {
use tokenizer::Token::*;
let mut ast = Vec::new(); //TODO have this come from previously-parsed tree
loop {
let cur_tok = match self.peek() {
Some(t) => t.clone(),
None => break
};
let result: ParseResult<ASTNode> = match cur_tok {
2016-01-12 04:04:14 -08:00
ref t if is_delimiter(&t) => { self.next(); continue},
2016-01-12 01:58:12 -08:00
_ => self.statement()
};
match result {
Ok(node) => ast.push(node),
Err(err) => return Err(err)
}
}
2016-01-12 01:58:12 -08:00
Ok(ast)
}
2016-01-12 01:58:12 -08:00
fn statement(&mut self) -> ParseResult<ASTNode> {
use tokenizer::Token::*;
let cur_tok: Token = self.peek().unwrap().clone();
let node: ASTNode = match cur_tok {
Keyword(Kw::Fn) => try!(self.declaration()),
2016-01-12 04:04:14 -08:00
_ => ASTNode::ExprNode(try!(self.expression())),
2016-01-12 01:58:12 -08:00
};
2016-01-12 01:58:12 -08:00
Ok(node)
}
2016-01-12 01:58:12 -08:00
fn declaration(&mut self) -> ParseResult<ASTNode> {
use tokenizer::Token::*;
expect!(self, Fn, "Expected 'fn'");
let prototype = try!(self.prototype());
2016-01-12 03:29:03 -08:00
let body: Vec<Expression> = try!(self.body());
2016-01-12 03:26:28 -08:00
expect!(self, Keyword(Kw::End), "Expected 'end'");
2016-01-12 03:29:03 -08:00
Ok(ASTNode::FuncNode(Function { prototype: prototype, body: body } ))
2016-01-12 01:58:12 -08:00
}
fn prototype(&mut self) -> ParseResult<Prototype> {
use tokenizer::Token::*;
let name: String = match self.peek() {
Some(Identifier(name)) => {self.next(); name},
_ => return ParseError::result_from_str("Expected identifier")
};
expect!(self, LParen, "Expected '('");
let mut args: Vec<String> = try!(self.identlist());
expect!(self, RParen, "Expected ')'");
Ok(Prototype {name: name, args: args})
}
fn identlist(&mut self) -> ParseResult<Vec<String>> {
use tokenizer::Token::*;
let mut args: Vec<String> = Vec::new();
loop {
match self.peek() {
Some(Identifier(name)) => {
args.push(name);
self.next();
if let Some(Comma) = self.peek() {
self.next();
} else {
break;
}
},
_ => break
}
}
2016-01-12 01:58:12 -08:00
Ok(args)
}
2016-01-12 03:29:03 -08:00
fn body(&mut self) -> ParseResult<Vec<Expression>> {
2016-01-12 01:58:12 -08:00
use tokenizer::Token::*;
2016-01-12 04:04:14 -08:00
let mut exprs = Vec::new();
loop {
match self.peek() {
Some(ref t) if is_delimiter(t) => { self.next(); continue},
Some(Keyword(Kw::End)) => break,
_ => {
let expr = try!(self.expression());
exprs.push(expr);
}
}
}
Ok(exprs)
2016-01-12 01:58:12 -08:00
}
2016-01-12 04:04:14 -08:00
fn expression(&mut self) -> ParseResult<Expression> {
2016-01-12 01:58:12 -08:00
use tokenizer::Token::*;
2016-01-15 03:27:24 -08:00
let lhs: Expression = try!(self.primary_expression());
self.precedence_expr(lhs, 0)
}
fn precedence_expr(&mut self, mut lhs: Expression, min_precedence: u8) -> ParseResult<Expression> {
2016-01-15 03:27:24 -08:00
use tokenizer::Token::*;
while let Some(Operator(op)) = self.peek() {
let precedence = self.get_precedence(&op);
if precedence < min_precedence {
break;
}
self.next();
let mut rhs = try!(self.primary_expression());
while let Some(Operator(ref op)) = self.peek() {
if self.get_precedence(op) > precedence {
let new_prec = self.get_precedence(op);
rhs = try!(self.precedence_expr(rhs, new_prec));
} else {
break;
}
}
lhs = Expression::BinExp(op.repr, Box::new(lhs), Box::new(rhs));
}
2016-01-15 03:27:24 -08:00
Ok(lhs)
2016-01-13 23:29:18 -08:00
}
fn primary_expression(&mut self) -> ParseResult<Expression> {
use tokenizer::Token::*;
let expr = match self.peek() {
Some(NumLiteral(n)) => { self.next(); Expression::Number(n) },
Some(StrLiteral(s)) => { self.next(); Expression::StringLiteral(s) },
2016-01-15 01:04:54 -08:00
Some(Identifier(var)) => {
self.next();
match self.peek() {
Some(Token::LParen) => try!(self.call_expr()),
2016-01-15 01:04:54 -08:00
_ => Expression::Variable(var)
}
},
Some(Token::LParen) => { try!(self.paren_expr()) }
2016-01-15 01:04:54 -08:00
Some(x) => return ParseError::result_from_str("Expected primary expression"),
None => return ParseError::result_from_str("Expected primary expression received EoI")
2016-01-12 03:26:28 -08:00
};
2016-01-13 23:29:18 -08:00
2016-01-12 04:04:14 -08:00
Ok(expr)
2016-01-12 01:58:12 -08:00
}
2016-01-15 01:04:54 -08:00
fn call_expr(&mut self) -> ParseResult<Expression> {
unimplemented!()
}
fn paren_expr(&mut self) -> ParseResult<Expression> {
expect!(self, Token::LParen, "Expected LParen");
2016-01-15 01:15:57 -08:00
let expr = try!(self.expression());
expect!(self, Token::RParen, "Expected LParen");
2016-01-15 01:15:57 -08:00
Ok(expr)
2016-01-15 01:04:54 -08:00
}
}
2016-01-12 01:58:12 -08:00
pub fn parse(tokens: &[Token], _parsed_tree: &[ASTNode]) -> ParseResult<AST> {
let mut parser = Parser::initialize(tokens);
parser.program()
2015-12-25 02:03:11 -08:00
}
#[cfg(test)]
mod tests {
use tokenizer;
use super::*;
macro_rules! parsetest {
($input:expr, $output:pat, $ifexpr:expr) => {
{
let tokens = tokenizer::tokenize($input).unwrap();
let ast = parse(&tokens, &[]).unwrap();
match &ast[..] {
$output if $ifexpr => (),
x => panic!("Error in parse test, got {:?} instead", x)
}
}
}
}
#[test]
fn parse_test() {
parsetest!("a", [ASTNode::ExprNode(Expression::Variable(ref s))], s == "a");
parsetest!("a + b",
[ASTNode::ExprNode(Expression::BinExp(ref plus, box Expression::Variable(ref a), box Expression::Variable(ref b)))],
plus == "+" && a == "a" && b == "b");
}
}