schala/schala-lang/language/src/parsing.rs
2019-09-20 02:21:39 -07:00

1268 lines
41 KiB
Rust

//! # Parsing
//! This module is where the recursive-descent parsing methods live.
//!
//!
//! # Schala EBNF Grammar
//! This document is the authoritative grammar of Schala, represented in something approximating
//! Extended Backus-Naur form. Terminal productions are in "double quotes", or UPPERCASE
//! if they represent a class of tokens rather than an specific string, or are otherwise
//! unreprsentable in ASCII.
//!
//! ## Top level structure
//!
//! ```text
//! program := (statement delimiter)* EOF
//! delimiter := NEWLINE | ";"
//! statement := expression | declaration
//! block := "{" (statement delimiter)* "}"
//! declaration := type_declaration | func_declaration | binding_declaration | impl_declaration
//! ```
//! ## Declarations
//!
//! ### Types
//! ```text
//! type_declaration := "type" type_declaration_body
//! type_declaration_body := "alias" type_alias | "mut"? type_singleton_name "=" type_body
//! type_alias := IDENTIFIER "=" type_name
//! type_body := variant_specifier ("|" variant_specifier)*
//! variant_specifier := IDENTIFIER | IDENTIFIER "{" typed_identifier_list "}" | IDENTIFIER "(" type_name* ")"
//! typed_identifier_list := typed_identifier*
//! typed_identifier := IDENTIFIER type_anno
//! ```
//! ### Functions
//!
//! ```text
//! func_declaration := func_signature func_body
//! func_body := ε | nonempty_func_body
//! nonempty_func_body := "{" (statement delimiter)* "}"
//! func_signature := "fn" func_name formal_param_list type_anno+
//! func_name := IDENTIFIER | operator
//! formal_param_list := "(" (formal_param ",")* ")"
//! formal_param := IDENTIFIER type_anno+
//! ```
//!
//! ### Variable bindings
//! ```text binding_declaration := "let" "mut"? IDENTIFIER "=" expresion```
//!
//! ### Interfaces
//!
//! ```text
//! interface_declaration := "interface" type_singleton_name signature_block
//! impl_declaration := "impl" type_singleton_name decl_block | "impl" type_singleton_name "for" type_name decl_block
//! decl_block := "{" (func_declaration)* "}"
//! signature_block := "{" (func_signature)* "}"
//! ```
//!
//! ### Type Annotations
//!
//! ```text
//! type_anno := ":" type_name
//! type_name := type_singleton_name | "(" type_names ")"
//! type_names := ε | type_name (, type_name)*
//! type_singleton_name = IDENTIFIER (type_params)*
//! type_params := "<" type_name (, type_name)* ">"
//! ```
//!
//! ## Expressions
//! ```text
//! expression := precedence_expr type_anno+
//! precedence_expr := prefix_expr
//! prefix_expr := prefix_op call_expr
//! prefix_op := "+" | "-" | "!" | "~"
//! call_expr := index_expr ( "(" invocation_list ")" )* | ε
//! invocation_list := invocation_argument ("," invocation_argument)* | ε
//! invocation_argument := expression | IDENTIFIER "=" expression | "_"
//! index_expr := primary ( "[" (expression ("," (expression)* | ε) "]" )*
//! primary := literal | paren_expr | if_expr | for_expr | while_expr | identifier_expr | lambda_expr | anonymous_struct | list_expr
//! expr_or_block := "{" (statement delimiter)* "}" | expr
//! ```
//!
//! ### Primary expressions
//!
//! ```text
//! list_expr := "[" (expression, ",")* "]"
//! lambda_expr := "\\" lambda_param_list type_anno+ nonempty_func_body
//! lambda_param_list := formal_param_list | formal_param
//! paren_expr := "(" paren_inner ")"
//! paren_inner := (expression ",")*
//! identifier_expr := qualified_identifier | named_struct
//! qualified_identifier := IDENTIFIER ("::" IDENTIFIER)*
//! ```
//!
//! ## Literals
//! ```text
//! literal := "true" | "false" | number_literal | STR_LITERAL
//! named_struct := qualified_identifier record_block
//! record_block := "{" (record_entry, ",")* | "}" //TODO support anonymus structs, update syntax
//! record_entry := IDENTIFIER ":" expression
//! anonymous_struct := TODO
//! number_literal := int_literal | float_literal
//! int_literal = ("0x" | "0b") digits
//! float_literal := digits ("." digits)
//! digits := (digit_group underscore*)+
//! digit_group := DIGIT+
//! ```
//!
//! ### Patterns
//! ```text
//! pattern := "(" (pattern, ",")* ")" | simple_pattern
//! simple_pattern := pattern_literal | record_pattern | tuple_struct_pattern
//! pattern_literal := "true" | "false" | signed_number_literal | STR_LITERAL | qualified_identifier
//! signed_number_literal := "-"? number_literal
//! record_pattern := qualified_identifier "{" (record_pattern_entry, ",")* "}"
//! record_pattern_entry := IDENTIFIER | IDENTIFIER ":" Pattern
//! tuple_struct_pattern := qualified_identifier "(" (pattern, ",")* ")"
//! ```
//!
//! ### If-expressions
//! ```text
//! if_expr := "if" discriminator ("then" condititional | "is" simple_pattern_match | guard_block)
//! discriminator := precedence_expr (operator)+
//! conditional := expr_or_block else_clause
//! simple_pattern_match := pattern "then" conditional
//! else_clause := ε | "else" expr_or_block
//! guard_block := "{" (guard_arm, ",")* "}"
//! guard_arm := guard "->" expr_or_block
//! guard := "is" pattern | (operator)+ precedence_expr
//! ```
//!
//! ### While expressions
//! ```text
//! while_expr := "while" while_cond "{" (statement delimiter)* "}"
//! while_cond := ε | expression | expression "is" pattern //TODO maybe is-expresions should be primary
//! ```
//!
//! //TODO this implies there must be at least one enumerator, which the parser doesn"t support right
//! //this second, and maybe should fail later anyway
//! ### For-expressions
//! ```text
//! for_expr := "for" (enumerator | "{" enumerators "}") for_expr_body
//! for_expr_body := "return" expression | "{" (statement delimiter)* "}"
//! enumerators := enumerator ("," enumerators)*
//! enumerator := identifier "<-" expression | identifier "=" expression //TODO add guards, etc.
//! ```
//!
mod test;
use std::rc::Rc;
use std::str::FromStr;
use crate::tokenizing::*;
use crate::tokenizing::Kw::*;
use crate::tokenizing::TokenKind::*;
use crate::ast::*;
/// Represents a parsing error
#[derive(Debug)]
pub struct ParseError {
pub msg: String,
pub token: Token
}
impl ParseError {
fn new_with_token<T, M>(msg: M, token: Token) -> ParseResult<T> where M: Into<String> {
Err(ParseError { msg: msg.into(), token })
}
}
/// Represents either a successful parsing result or a ParseError
pub type ParseResult<T> = Result<T, ParseError>;
#[derive(Debug)]
pub struct ParseRecord {
production_name: String,
next_token: String,
level: u32,
}
/// Main data structure for doing parsing.
pub struct Parser {
token_handler: TokenHandler,
parse_record: Vec<ParseRecord>,
parse_level: u32,
restrictions: ParserRestrictions,
id_store: ItemIdStore,
}
struct ParserRestrictions {
no_struct_literal: bool
}
struct TokenHandler {
tokens: Vec<Token>,
idx: usize,
end_of_file: (usize, usize),
}
impl TokenHandler {
fn new(tokens: Vec<Token>) -> TokenHandler {
let end_of_file = match tokens.last() {
None => (0, 0),
Some(t) => (t.line_num, t.char_num)
};
TokenHandler { idx: 0, tokens, end_of_file }
}
fn peek_kind(&mut self) -> TokenKind {
self.peek().kind
}
fn peek_kind_n(&mut self, n: usize) -> TokenKind {
self.peek_n(n).kind
}
fn peek(&mut self) -> Token {
self.tokens.get(self.idx).map(|t: &Token| { t.clone()}).unwrap_or(Token { kind: TokenKind::EOF, line_num: self.end_of_file.0, char_num: self.end_of_file.1})
}
/// calling peek_n(0) is the same thing as peek()
fn peek_n(&mut self, n: usize) -> Token {
self.tokens.get(self.idx + n).map(|t: &Token| { t.clone()}).unwrap_or(Token { kind: TokenKind::EOF, line_num: self.end_of_file.0, char_num: self.end_of_file.1})
}
fn next(&mut self) -> Token {
self.idx += 1;
self.tokens.get(self.idx - 1).map(|t: &Token| { t.clone() }).unwrap_or(Token { kind: TokenKind::EOF, line_num: self.end_of_file.0, char_num: self.end_of_file.1})
}
}
impl Parser {
/// Create a new parser initialized with some tokens.
pub fn new(initial_input: Vec<Token>) -> Parser {
Parser {
token_handler: TokenHandler::new(initial_input),
parse_record: vec![],
parse_level: 0,
restrictions: ParserRestrictions { no_struct_literal: false },
id_store: ItemIdStore::new(),
}
}
/// Parse all loaded tokens up to this point.
pub fn parse(&mut self) -> ParseResult<AST> {
self.program()
}
/*
pub fn parse_with_new_tokens(&mut self, new_tokens: Vec<Token>) -> ParseResult<AST> {
}
*/
pub fn format_parse_trace(&self) -> String {
let mut buf = String::new();
for r in self.parse_record.iter() {
let mut indent = String::new();
for _ in 0..r.level {
indent.push(' ');
}
buf.push_str(&format!("{}Production `{}`, token: {}\n", indent, r.production_name, r.next_token))
}
buf
}
}
macro_rules! print_token_pattern {
($tokenpattern:pat) => { stringify!($tokenpattern) }
}
macro_rules! expect {
($self:expr, $token_kind:pat) => { expect!($self, $token_kind if true) };
($self:expr, $expected_kind:pat if $cond:expr) => {
{
let tok = $self.token_handler.peek();
match tok.get_kind() {
$expected_kind if $cond => $self.token_handler.next(),
actual_kind => {
let msg = format!("Expected {}, got {:?}", print_token_pattern!($expected_kind), actual_kind);
return ParseError::new_with_token(msg, tok);
}
}
}
}
}
macro_rules! delimited {
($self:expr, $start:pat, $parse_fn:ident, $( $delim:pat )|+, $end:pat, nonstrict) => {
delimited!($self, $start, $parse_fn, $( $delim )|*, $end, false)
};
($self:expr, $start:pat, $parse_fn:ident, $( $delim:pat )|+, $end:pat) => {
delimited!($self, $start, $parse_fn, $( $delim )|*, $end, true)
};
($self:expr, $start:pat, $parse_fn:ident, $( $delim:pat )|+, $end:pat, $strictness:expr) => {
{
expect!($self, $start);
let mut acc = vec![];
loop {
let peek = $self.token_handler.peek();
match peek.get_kind() {
$end | EOF => break,
_ => (),
}
if !$strictness {
match peek.get_kind() {
$( $delim )|* => { $self.token_handler.next(); continue },
_ => ()
}
}
acc.push($self.$parse_fn()?);
match $self.token_handler.peek().get_kind() {
$( $delim )|* => { $self.token_handler.next(); continue },
_ if $strictness => break,
_ => continue,
};
}
expect!($self, $end);
acc
}
};
}
impl Parser {
/// `program := (statement delimiter)* EOF`
/// `delimiter := NEWLINE | ';'`
#[recursive_descent_method]
fn program(&mut self) -> ParseResult<AST> {
let mut statements = Vec::new();
loop {
match self.token_handler.peek().get_kind() {
EOF => break,
Newline | Semicolon => {
self.token_handler.next();
continue;
},
_ => statements.push(
self.statement()?
),
}
}
Ok(AST { id: self.id_store.fresh(), statements })
}
/// `statement := expression | declaration`
#[recursive_descent_method]
fn statement(&mut self) -> ParseResult<Statement> {
//TODO handle error recovery here
let kind = match self.token_handler.peek().get_kind() {
Keyword(Type) => self.type_declaration().map(|decl| { StatementKind::Declaration(decl) }),
Keyword(Func)=> self.func_declaration().map(|func| { StatementKind::Declaration(func) }),
Keyword(Let) => self.binding_declaration().map(|decl| StatementKind::Declaration(decl)),
Keyword(Interface) => self.interface_declaration().map(|decl| StatementKind::Declaration(decl)),
Keyword(Impl) => self.impl_declaration().map(|decl| StatementKind::Declaration(decl)),
_ => self.expression().map(|expr| { StatementKind::Expression(expr.into()) } ),
}?;
Ok(Statement { kind, id: self.id_store.fresh() })
}
#[recursive_descent_method]
fn type_declaration(&mut self) -> ParseResult<Declaration> {
expect!(self, Keyword(Type));
self.type_declaration_body()
}
#[recursive_descent_method]
fn type_declaration_body(&mut self) -> ParseResult<Declaration> {
if let Keyword(Alias) = self.token_handler.peek_kind() {
self.type_alias()
} else {
let mutable = if let Keyword(Mut) = self.token_handler.peek_kind() {
self.token_handler.next();
true
} else {
false
};
let name = self.type_singleton_name()?;
expect!(self, Equals);
let body = self.type_body()?;
Ok(Declaration::TypeDecl { name, body, mutable})
}
}
#[recursive_descent_method]
fn type_alias(&mut self) -> ParseResult<Declaration> {
expect!(self, Keyword(Alias));
let alias = self.identifier()?;
expect!(self, Equals);
let original = self.identifier()?;
Ok(Declaration::TypeAlias(alias, original))
}
#[recursive_descent_method]
fn type_body(&mut self) -> ParseResult<TypeBody> {
let mut variants = Vec::new();
variants.push(self.variant_specifier()?);
loop {
if let Pipe = self.token_handler.peek_kind() {
self.token_handler.next();
variants.push(self.variant_specifier()?);
} else {
break;
}
}
Ok(TypeBody(variants))
}
#[recursive_descent_method]
fn variant_specifier(&mut self) -> ParseResult<Variant> {
use self::Variant::*;
let name = self.identifier()?;
match self.token_handler.peek_kind() {
LParen => {
let tuple_members = delimited!(self, LParen, type_name, Comma, RParen);
Ok(TupleStruct(name, tuple_members))
},
LCurlyBrace => {
let typed_identifier_list = delimited!(self, LCurlyBrace, typed_identifier, Comma, RCurlyBrace);
Ok(Record {name, members: typed_identifier_list })
},
_ => Ok(UnitStruct(name))
}
}
#[recursive_descent_method]
fn typed_identifier(&mut self) -> ParseResult<(Rc<String>, TypeIdentifier)> {
let identifier = self.identifier()?;
expect!(self, Colon);
let type_name = self.type_name()?;
Ok((identifier, type_name))
}
#[recursive_descent_method]
fn func_declaration(&mut self) -> ParseResult<Declaration> {
let signature = self.func_signature()?;
if let LCurlyBrace = self.token_handler.peek_kind() {
let statements = self.nonempty_func_body()?;
Ok(Declaration::FuncDecl(signature, statements))
} else {
Ok(Declaration::FuncSig(signature))
}
}
#[recursive_descent_method]
fn func_signature(&mut self) -> ParseResult<Signature> {
expect!(self, Keyword(Func));
let (name, operator) = match self.token_handler.peek_kind() {
Operator(s) => {
let name = s.clone();
self.token_handler.next();
(name, true)
},
_ => (self.identifier()?, false)
};
let params = self.formal_param_list()?;
let type_anno = match self.token_handler.peek_kind() {
Colon => Some(self.type_anno()?),
_ => None,
};
Ok(Signature { name, operator, params, type_anno })
}
#[recursive_descent_method]
fn nonempty_func_body(&mut self) -> ParseResult<Vec<Statement>> {
let statements = delimited!(self, LCurlyBrace, statement, Newline | Semicolon, RCurlyBrace, nonstrict);
Ok(statements)
}
#[recursive_descent_method]
fn formal_param_list(&mut self) -> ParseResult<Vec<FormalParam>> {
Ok(delimited!(self, LParen, formal_param, Comma, RParen))
}
//TODO needs to support default values
#[recursive_descent_method]
fn formal_param(&mut self) -> ParseResult<FormalParam> {
let name = self.identifier()?;
let anno = match self.token_handler.peek_kind() {
Colon => Some(self.type_anno()?),
_ => None
};
let default = match self.token_handler.peek_kind() {
Equals => {
self.token_handler.next();
Some(self.expression()?.into())
},
_ => None
};
Ok(FormalParam { name, anno, default })
}
#[recursive_descent_method]
fn binding_declaration(&mut self) -> ParseResult<Declaration> {
expect!(self, Keyword(Kw::Let));
let constant = match self.token_handler.peek_kind() {
Keyword(Kw::Mut) => {
self.token_handler.next();
false
}
_ => true
};
let name = self.identifier()?;
let type_anno = if let Colon = self.token_handler.peek_kind() {
Some(self.type_anno()?)
} else {
None
};
expect!(self, Equals);
let expr = self.expression()?.into();
Ok(Declaration::Binding { name, constant, type_anno, expr })
}
#[recursive_descent_method]
fn interface_declaration(&mut self) -> ParseResult<Declaration> {
expect!(self, Keyword(Interface));
let name = self.identifier()?;
let signatures = self.signature_block()?;
Ok(Declaration::Interface { name, signatures })
}
#[recursive_descent_method]
fn signature_block(&mut self) -> ParseResult<Vec<Signature>> {
Ok(delimited!(self, LCurlyBrace, func_signature, Newline | Semicolon, RCurlyBrace, nonstrict))
}
#[recursive_descent_method]
fn impl_declaration(&mut self) -> ParseResult<Declaration> {
expect!(self, Keyword(Impl));
let first = self.type_singleton_name()?;
let second = if let Keyword(For) = self.token_handler.peek_kind() {
self.token_handler.next();
Some(self.type_name()?)
} else {
None
};
let block = self.decl_block()?;
Ok(match (first, second) {
(interface_name, Some(type_name)) =>
Declaration::Impl { type_name, interface_name: Some(interface_name), block },
(type_singleton_name, None) =>
Declaration::Impl { type_name: TypeIdentifier::Singleton(type_singleton_name), interface_name: None, block }
})
}
#[recursive_descent_method]
fn decl_block(&mut self) -> ParseResult<Vec<Declaration>> {
Ok(delimited!(self, LCurlyBrace, func_declaration, Newline | Semicolon, RCurlyBrace, nonstrict))
}
#[recursive_descent_method]
fn expression(&mut self) -> ParseResult<Expression> {
let mut expr_body = self.precedence_expr(BinOp::min_precedence())?;
let type_anno = match self.token_handler.peek_kind() {
Colon => Some(self.type_anno()?),
_ => None
};
if let Some(_) = expr_body.type_anno {
return ParseError::new_with_token("Bad parse state encountered", self.token_handler.peek());
}
expr_body.type_anno = type_anno;
Ok(expr_body)
}
#[recursive_descent_method]
fn type_anno(&mut self) -> ParseResult<TypeIdentifier> {
expect!(self, Colon);
self.type_name()
}
#[recursive_descent_method]
fn type_name(&mut self) -> ParseResult<TypeIdentifier> {
use self::TypeIdentifier::*;
Ok(match self.token_handler.peek_kind() {
LParen => Tuple(delimited!(self, LParen, type_name, Comma, RParen)),
_ => Singleton(self.type_singleton_name()?),
})
}
#[recursive_descent_method]
fn type_singleton_name(&mut self) -> ParseResult<TypeSingletonName> {
Ok(TypeSingletonName {
name: self.identifier()?,
params: match self.token_handler.peek_kind() {
LAngleBracket => delimited!(self, LAngleBracket, type_name, Comma, RAngleBracket),
_ => vec![],
}
})
}
// this implements Pratt parsing, see http://journal.stuffwithstuff.com/2011/03/19/pratt-parsers-expression-parsing-made-easy/
fn precedence_expr(&mut self, precedence: i32) -> ParseResult<Expression> {
let record = ParseRecord {
production_name: "precedence_expr".to_string(),
next_token: format!("{}", self.token_handler.peek().to_string_with_metadata()),
level: self.parse_level,
};
self.parse_level += 1;
self.parse_record.push(record);
let mut lhs = self.prefix_expr()?;
loop {
let new_precedence = match BinOp::get_precedence_from_token(&self.token_handler.peek_kind()) {
Some(p) => p,
None => break,
};
if precedence >= new_precedence {
break;
}
let next_tok = self.token_handler.next();
let operation = match BinOp::from_sigil_token(&next_tok.kind) {
Some(sigil) => sigil,
None => unreachable!()
};
let rhs = self.precedence_expr(new_precedence)?;
lhs = Expression::new(self.id_store.fresh(), ExpressionKind::BinExp(operation, bx!(lhs.into()), bx!(rhs.into())));
}
self.parse_level -= 1;
Ok(lhs)
}
#[recursive_descent_method]
fn prefix_expr(&mut self) -> ParseResult<Expression> {
match self.token_handler.peek_kind() {
Operator(ref op) if PrefixOp::is_prefix(&*op) => {
let sigil = match self.token_handler.next().kind {
Operator(op) => op,
_ => unreachable!(),
};
let expr = self.primary()?;
let prefix_op = PrefixOp::from_str(sigil.as_str()).unwrap();
Ok(Expression::new(
self.id_store.fresh(),
ExpressionKind::PrefixExp(prefix_op, bx!(expr.into()))
))
},
_ => self.call_expr()
}
}
#[recursive_descent_method]
fn call_expr(&mut self) -> ParseResult<Expression> {
let mut expr = self.index_expr()?;
while let LParen = self.token_handler.peek_kind() {
let arguments = delimited!(self, LParen, invocation_argument, Comma, RParen);
let arguments = arguments.into_iter().collect();
expr = Expression::new(self.id_store.fresh(), ExpressionKind::Call { f: bx!(expr.into()), arguments }); //TODO no type anno is incorrect
}
Ok(expr)
}
#[recursive_descent_method]
fn invocation_argument(&mut self) -> ParseResult<InvocationArgument> {
Ok(match self.token_handler.peek_kind() {
Underscore => {
self.token_handler.next();
InvocationArgument::Ignored
},
Identifier(s) => {
match self.token_handler.peek_kind_n(1) {
Equals => {
self.token_handler.next();
self.token_handler.next();
let expr = self.expression()?.into();
InvocationArgument::Keyword { name: s.clone(), expr }
},
_ => {
let expr = self.expression()?;
InvocationArgument::Positional(expr.into())
}
}
},
_ => InvocationArgument::Positional(self.expression()?.into())
})
}
#[recursive_descent_method]
fn index_expr(&mut self) -> ParseResult<Expression> {
let primary = self.primary()?;
Ok(if let LSquareBracket = self.token_handler.peek_kind() {
let indexers = delimited!(self, LSquareBracket, expression, Comma, RSquareBracket)
.into_iter().map(|ex| ex.into()).collect();
Expression::new(self.id_store.fresh(), ExpressionKind::Index {
indexee: bx!(Expression::new(self.id_store.fresh(), primary.kind).into()),
indexers,
})
} else {
primary
})
}
#[recursive_descent_method]
fn primary(&mut self) -> ParseResult<Expression> {
match self.token_handler.peek_kind() {
LCurlyBrace => self.curly_brace_expr(),
Backslash => self.lambda_expr(),
LParen => self.paren_expr(),
LSquareBracket => self.list_expr(),
Keyword(Kw::If) => self.if_expr(),
Keyword(Kw::For) => self.for_expr(),
Keyword(Kw::While) => self.while_expr(),
Identifier(_) => self.identifier_expr(),
_ => self.literal(),
}
}
#[recursive_descent_method]
fn list_expr(&mut self) -> ParseResult<Expression> {
let exprs = delimited!(self, LSquareBracket, expression, Comma, RSquareBracket)
.into_iter().map(|ex| ex.into()).collect();
Ok(Expression::new(self.id_store.fresh(), ExpressionKind::ListLiteral(exprs)))
}
#[recursive_descent_method]
fn curly_brace_expr(&mut self) -> ParseResult<Expression> {
ParseError::new_with_token("Not implemented", self.token_handler.peek())
}
#[recursive_descent_method]
fn lambda_expr(&mut self) -> ParseResult<Expression> {
expect!(self, Backslash);
let params = self.lambda_param_list()?;
let type_anno = match self.token_handler.peek_kind() {
Colon => Some(self.type_anno()?),
_ => None,
};
let body = self.nonempty_func_body()?;
Ok(Expression::new(self.id_store.fresh(), ExpressionKind::Lambda { params, type_anno, body })) //TODO need to handle types somehow
}
#[recursive_descent_method]
fn lambda_param_list(&mut self) -> ParseResult<Vec<FormalParam>> {
if let LParen = self.token_handler.peek_kind() {
self.formal_param_list()
} else {
let single_param = self.formal_param()?;
Ok(vec![single_param])
}
}
#[recursive_descent_method]
fn paren_expr(&mut self) -> ParseResult<Expression> {
use self::ExpressionKind::*;
let old_struct_value = self.restrictions.no_struct_literal;
self.restrictions.no_struct_literal = false;
let output = {
let mut inner = delimited!(self, LParen, expression, Comma, RParen);
match inner.len() {
0 => Ok(Expression::new(self.id_store.fresh(), TupleLiteral(vec![]))),
1 => Ok(inner.pop().unwrap()),
_ => {
let inner: Vec<Expression> = inner.into_iter().map(|ex| ex.into()).collect();
Ok(Expression::new(self.id_store.fresh(), TupleLiteral(inner)))
}
}
};
self.restrictions.no_struct_literal = old_struct_value;
output
}
#[recursive_descent_method]
fn identifier_expr(&mut self) -> ParseResult<Expression> {
use self::ExpressionKind::*;
let qualified_identifier = self.qualified_identifier()?;
Ok(match self.token_handler.peek_kind() {
LCurlyBrace if !self.restrictions.no_struct_literal => {
let fields = self.record_block()?;
Expression::new(self.id_store.fresh(), NamedStruct { name: qualified_identifier, fields })
},
_ => Expression::new(self.id_store.fresh(), Value(qualified_identifier))
})
}
#[recursive_descent_method]
fn qualified_identifier(&mut self) -> ParseResult<QualifiedName> {
let mut components = vec![self.identifier()?];
loop {
match (self.token_handler.peek_kind(), self.token_handler.peek_kind_n(1)) {
(Colon, Colon) => {
self.token_handler.next(); self.token_handler.next();
components.push(self.identifier()?);
},
_ => break,
}
}
Ok(QualifiedName { id: self.id_store.fresh(), components })
}
#[recursive_descent_method]
fn record_block(&mut self) -> ParseResult<Vec<(Rc<String>, Expression)>> {
Ok(
delimited!(self, LCurlyBrace, record_entry, Comma, RCurlyBrace)
.into_iter().map(|(s, ex)| (s, ex.into())).collect()
)
}
#[recursive_descent_method]
fn record_entry(&mut self) -> ParseResult<(Rc<String>, Expression)> {
let field_name = self.identifier()?;
expect!(self, Colon);
let value = self.expression()?;
Ok((field_name, value))
}
#[recursive_descent_method]
fn if_expr(&mut self) -> ParseResult<Expression> {
expect!(self, Keyword(Kw::If));
let discriminator = Box::new({
self.restrictions.no_struct_literal = true;
let x = self.discriminator();
self.restrictions.no_struct_literal = false;
x?
});
let body = Box::new(match self.token_handler.peek_kind() {
Keyword(Kw::Then) => self.conditional()?,
Keyword(Kw::Is) => self.simple_pattern_match()? ,
_ => self.guard_block()?
});
Ok(Expression::new(self.id_store.fresh(), ExpressionKind::IfExpression { discriminator, body }))
}
#[recursive_descent_method]
fn discriminator(&mut self) -> ParseResult<Discriminator> {
let lhs = self.prefix_expr()?;
let ref next = self.token_handler.peek_kind();
Ok(if let Some(op) = BinOp::from_sigil_token(next) {
Discriminator::BinOp(lhs.into(), op)
} else {
Discriminator::Simple(lhs.into())
})
}
#[recursive_descent_method]
fn conditional(&mut self) -> ParseResult<IfExpressionBody> {
expect!(self, Keyword(Kw::Then));
let then_clause = self.expr_or_block()?;
let else_clause = self.else_clause()?;
Ok(IfExpressionBody::SimpleConditional(then_clause, else_clause))
}
#[recursive_descent_method]
fn simple_pattern_match(&mut self) -> ParseResult<IfExpressionBody> {
expect!(self, Keyword(Kw::Is));
let pat = self.pattern()?;
expect!(self, Keyword(Kw::Then));
let then_clause = self.expr_or_block()?;
let else_clause = self.else_clause()?;
Ok(IfExpressionBody::SimplePatternMatch(pat, then_clause, else_clause))
}
#[recursive_descent_method]
fn else_clause(&mut self) -> ParseResult<Option<Block>> {
Ok(if let Keyword(Kw::Else) = self.token_handler.peek_kind() {
self.token_handler.next();
Some(self.expr_or_block()?)
} else {
None
})
}
#[recursive_descent_method]
fn guard_block(&mut self) -> ParseResult<IfExpressionBody> {
//TODO - delimited! isn't sophisticated enough to do thisa
//let guards = delimited!(self, LCurlyBrace, guard_arm, Comma, RCurlyBrace);
expect!(self, LCurlyBrace);
let mut guards = vec![];
loop {
match self.token_handler.peek_kind() {
RCurlyBrace | EOF => break,
Semicolon | Newline => { self.token_handler.next(); continue},
_ => {
let guard_arm = self.guard_arm()?;
guards.push(guard_arm);
loop {
match self.token_handler.peek_kind() {
Semicolon | Newline => { self.token_handler.next(); continue; },
_ => break,
}
}
if let RCurlyBrace = self.token_handler.peek_kind() {
break;
}
expect!(self, Comma);
}
}
}
expect!(self, RCurlyBrace);
Ok(IfExpressionBody::GuardList(guards))
}
#[recursive_descent_method]
fn guard_arm(&mut self) -> ParseResult<GuardArm> {
let guard = self.guard()?;
expect!(self, Operator(ref c) if **c == "->");
let body = self.expr_or_block()?;
Ok(GuardArm { guard, body })
}
#[recursive_descent_method]
fn guard(&mut self) -> ParseResult<Guard> {
Ok(match self.token_handler.peek_kind() {
Keyword(Kw::Is) => {
self.token_handler.next();
let pat = self.pattern()?;
Guard::Pat(pat)
},
ref tok if BinOp::from_sigil_token(tok).is_some() => {
let op = BinOp::from_sigil_token(&self.token_handler.next().kind).unwrap();
let precedence = op.get_precedence();
let Expression { kind, .. } = self.precedence_expr(precedence)?;
Guard::HalfExpr(HalfExpr { op: Some(op), expr: kind })
},
_ => {
//TODO - I think there's a better way to do this involving the precedence of ->
let Expression { kind, .. } = self.prefix_expr()?;
Guard::HalfExpr(HalfExpr { op: None, expr: kind })
}
})
}
#[recursive_descent_method]
fn pattern(&mut self) -> ParseResult<Pattern> {
if let LParen = self.token_handler.peek_kind() {
let tuple_pattern_variants = delimited!(self, LParen, pattern, Comma, RParen);
Ok(Pattern::TuplePattern(tuple_pattern_variants))
} else {
self.simple_pattern()
}
}
#[recursive_descent_method]
fn simple_pattern(&mut self) -> ParseResult<Pattern> {
Ok(match self.token_handler.peek_kind() {
Identifier(_) => {
let qualified_name = self.qualified_identifier()?;
match self.token_handler.peek_kind() {
LCurlyBrace => {
let members = delimited!(self, LCurlyBrace, record_pattern_entry, Comma, RCurlyBrace);
Pattern::Record(qualified_name, members)
},
LParen => {
let members = delimited!(self, LParen, pattern, Comma, RParen);
Pattern::TupleStruct(qualified_name, members)
},
_ => {
Pattern::VarOrName(qualified_name)
},
}
},
_ => self.pattern_literal()?
})
}
#[recursive_descent_method]
fn pattern_literal(&mut self) -> ParseResult<Pattern> {
let tok = self.token_handler.peek();
Ok(match tok.get_kind() {
Keyword(Kw::True) => {
self.token_handler.next();
Pattern::Literal(PatternLiteral::BoolPattern(true))
},
Keyword(Kw::False) => {
self.token_handler.next();
Pattern::Literal(PatternLiteral::BoolPattern(false))
},
StrLiteral(s) => {
self.token_handler.next();
Pattern::Literal(PatternLiteral::StringPattern(s))
},
DigitGroup(_) | HexLiteral(_) | BinNumberSigil | Period => self.signed_number_literal()?,
Operator(ref op) if **op == "-" => self.signed_number_literal()?,
Underscore => {
self.token_handler.next();
Pattern::Ignored
},
other => return ParseError::new_with_token(format!("{:?} is not a valid Pattern", other), tok)
})
}
/*
#[recursive_descent_method]
fn simple_pattern(&mut self) -> ParseResult<Pattern> {
Ok({
let tok = self.token_handler.peek();
match tok.get_kind() {
Identifier(_) => {
let id = self.identifier()?;
match self.token_handler.peek_kind() {
LCurlyBrace => {
let members = delimited!(self, LCurlyBrace, record_pattern_entry, Comma, RCurlyBrace);
Pattern::Record(id, members)
},
LParen => {
let members = delimited!(self, LParen, pattern, Comma, RParen);
Pattern::TupleStruct(id, members)
},
_ => Pattern::Literal(PatternLiteral::VarPattern(id))
}
},
Keyword(Kw::True) => {
self.token_handler.next();
Pattern::Literal(PatternLiteral::BoolPattern(true))
},
Keyword(Kw::False) => {
self.token_handler.next();
Pattern::Literal(PatternLiteral::BoolPattern(false))
},
StrLiteral(s) => {
self.token_handler.next();
Pattern::Literal(PatternLiteral::StringPattern(s))
},
DigitGroup(_) | HexLiteral(_) | BinNumberSigil | Period => self.signed_number_literal()?,
Operator(ref op) if **op == "-" => self.signed_number_literal()?,
Underscore => {
self.token_handler.next();
Pattern::Ignored
},
other => return ParseError::new_with_token(format!("{:?} is not a valid Pattern", other), tok)
}
})
}
*/
#[recursive_descent_method]
fn signed_number_literal(&mut self) -> ParseResult<Pattern> {
let neg = match self.token_handler.peek_kind() {
Operator(ref op) if **op == "-" => {
self.token_handler.next();
true
},
_ => false
};
let Expression { kind, .. } = self.number_literal()?;
Ok(Pattern::Literal(PatternLiteral::NumPattern { neg, num: kind }))
}
#[recursive_descent_method]
fn record_pattern_entry(&mut self) -> ParseResult<(Rc<String>, Pattern)> {
let name = self.identifier()?;
Ok(match self.token_handler.peek_kind() {
Colon => {
expect!(self, Colon);
let pat = self.pattern()?;
(name, pat)
},
_ => (name.clone(), Pattern::Literal(PatternLiteral::StringPattern(name.clone())))
})
}
#[recursive_descent_method]
fn block(&mut self) -> ParseResult<Block> {
let block = delimited!(self, LCurlyBrace, statement, Newline | Semicolon, RCurlyBrace, nonstrict);
Ok(block)
}
#[recursive_descent_method]
fn expr_or_block(&mut self) -> ParseResult<Block> {
match self.token_handler.peek_kind() {
LCurlyBrace => self.block(),
_ => {
let expr = self.expression()?;
let s = Statement { id: self.id_store.fresh(), kind: StatementKind::Expression(expr.into()) };
Ok(vec![s])
}
}
}
#[recursive_descent_method]
fn while_expr(&mut self) -> ParseResult<Expression> {
use self::ExpressionKind::*;
expect!(self, Keyword(Kw::While));
let condition = {
self.restrictions.no_struct_literal = true;
let x = self.while_cond();
self.restrictions.no_struct_literal = false;
x?.map(|expr| bx!(expr.into()))
};
let body = self.block()?;
Ok(Expression::new(self.id_store.fresh(), WhileExpression {condition, body}))
}
#[recursive_descent_method]
fn while_cond(&mut self) -> ParseResult<Option<Expression>> {
Ok(match self.token_handler.peek_kind() {
LCurlyBrace => None,
_ => Some(self.expression()?),
})
}
#[recursive_descent_method]
fn for_expr(&mut self) -> ParseResult<Expression> {
expect!(self, Keyword(Kw::For));
let enumerators = if let LCurlyBrace = self.token_handler.peek_kind() {
delimited!(self, LCurlyBrace, enumerator, Comma | Newline, RCurlyBrace)
} else {
let single_enum = {
self.restrictions.no_struct_literal = true;
let s = self.enumerator();
self.restrictions.no_struct_literal = false;
s?
};
vec![single_enum]
};
let body = Box::new(self.for_expr_body()?);
Ok(Expression::new(self.id_store.fresh(), ExpressionKind::ForExpression { enumerators, body }))
}
#[recursive_descent_method]
fn enumerator(&mut self) -> ParseResult<Enumerator> {
let id = self.identifier()?;
expect!(self, Operator(ref c) if **c == "<-");
let generator = self.expression()?.into();
Ok(Enumerator { id, generator })
}
#[recursive_descent_method]
fn for_expr_body(&mut self) -> ParseResult<ForBody> {
use self::ForBody::*;
let tok = self.token_handler.peek();
Ok(match tok.get_kind() {
LCurlyBrace => {
let statements = delimited!(self, LCurlyBrace, statement, Newline | Semicolon, RCurlyBrace, nonstrict);
StatementBlock(statements)
},
Keyword(Kw::Return) => {
self.token_handler.next();
MonadicReturn(self.expression()?.into())
},
_ => return ParseError::new_with_token("for expressions must end in a block or 'return'", tok),
})
}
#[recursive_descent_method]
fn identifier(&mut self) -> ParseResult<Rc<String>> {
let tok = self.token_handler.next();
match tok.get_kind() {
Identifier(s) => Ok(s),
p => ParseError::new_with_token(format!("Expected an identifier, got {:?}", p), tok),
}
}
#[recursive_descent_method]
fn literal(&mut self) -> ParseResult<Expression> {
use self::ExpressionKind::*;
let tok = self.token_handler.peek();
match tok.get_kind() {
DigitGroup(_) | HexLiteral(_) | BinNumberSigil | Period => self.number_literal(),
Keyword(Kw::True) => {
self.token_handler.next();
let id = self.id_store.fresh();
Ok(Expression::new(id, BoolLiteral(true)))
},
Keyword(Kw::False) => {
self.token_handler.next();
let id = self.id_store.fresh();
Ok(Expression::new(id, BoolLiteral(false)))
},
StrLiteral(s) => {
self.token_handler.next();
let id = self.id_store.fresh();
Ok(Expression::new(id, StringLiteral(s.clone())))
}
e => ParseError::new_with_token(format!("Expected a literal expression, got {:?}", e), tok),
}
}
#[recursive_descent_method]
fn number_literal(&mut self) -> ParseResult<Expression> {
match self.token_handler.peek_kind() {
HexLiteral(_) | BinNumberSigil => self.int_literal(),
_ => self.float_literal(),
}
}
#[recursive_descent_method]
fn int_literal(&mut self) -> ParseResult<Expression> {
use self::ExpressionKind::*;
let tok = self.token_handler.next();
match tok.get_kind() {
BinNumberSigil => {
let digits = self.digits()?;
let n = parse_binary(digits, tok)?;
Ok(Expression::new(self.id_store.fresh(), NatLiteral(n)))
},
HexLiteral(text) => {
let digits: String = text.chars().filter(|c| c.is_digit(16)).collect();
let n = parse_hex(digits, tok)?;
Ok(Expression::new(self.id_store.fresh(), NatLiteral(n)))
},
_ => return ParseError::new_with_token("Expected '0x' or '0b'", tok),
}
}
#[recursive_descent_method]
fn float_literal(&mut self) -> ParseResult<Expression> {
use self::ExpressionKind::*;
let tok = self.token_handler.peek();
let mut digits = self.digits()?;
if let Period = self.token_handler.peek_kind() {
self.token_handler.next();
digits.push_str(".");
digits.push_str(&self.digits()?);
match digits.parse::<f64>() {
Ok(f) => Ok(Expression::new(self.id_store.fresh(), FloatLiteral(f))),
Err(e) => ParseError::new_with_token(format!("Float failed to parse with error: {}", e), tok),
}
} else {
match digits.parse::<u64>() {
Ok(d) => Ok(Expression::new(self.id_store.fresh(), NatLiteral(d))),
Err(e) => ParseError::new_with_token(format!("Integer failed to parse with error: {}", e), tok),
}
}
}
#[recursive_descent_method]
fn digits(&mut self) -> ParseResult<String> {
let mut ds = String::new();
loop {
match self.token_handler.peek_kind() {
Underscore => { self.token_handler.next(); continue; },
DigitGroup(ref s) => { self.token_handler.next(); ds.push_str(s)},
_ => break,
}
}
Ok(ds)
}
}
fn parse_binary(digits: String, tok: Token) -> ParseResult<u64> {
let mut result: u64 = 0;
let mut multiplier = 1;
for d in digits.chars().rev() {
match d {
'1' => result += multiplier,
'0' => (),
_ => return ParseError::new_with_token("Encountered a character not '1' or '0 while parsing a binary literal", tok),
}
multiplier = match multiplier.checked_mul(2) {
Some(m) => m,
None => return ParseError::new_with_token("This binary expression will overflow", tok)
}
}
Ok(result)
}
fn parse_hex(digits: String, tok: Token) -> ParseResult<u64> {
let mut result: u64 = 0;
let mut multiplier: u64 = 1;
for d in digits.chars().rev() {
match d.to_digit(16) {
Some(n) => result += n as u64 * multiplier,
None => return ParseError::new_with_token("Encountered a non-hex digit in a hex literal", tok),
}
multiplier = match multiplier.checked_mul(16) {
Some(m) => m,
None => return ParseError::new_with_token("This hex expression will overflow", tok)
}
}
Ok(result)
}