#![allow(clippy::upper_case_acronyms)] //! # Parsing //! This module is where the recursive-descent parsing methods live. //! //! //! # Schala EBNF Grammar //! This document is the authoritative grammar of Schala, represented in something approximating //! Extended Backus-Naur form. Terminal productions are in "double quotes", or UPPERCASE //! if they represent a class of tokens rather than an specific string, or are otherwise //! unreprsentable in ASCII. //! //! ## Top level structure //! //! ```text //! program := (statement delimiter)* EOF //! delimiter := NEWLINE | ";" //! statement := expression | declaration | import | module //! block := "{" (statement delimiter)* "}" //! declaration := type_declaration | func_declaration | binding_declaration | impl_declaration //! ``` //! ## Declarations //! //! ### Types //! ```text //! type_declaration := "type" type_declaration_body //! type_declaration_body := "alias" type_alias | "mut"? type_singleton_name "=" type_body //! type_alias := IDENTIFIER "=" type_name //! type_body := variant_specifier ("|" variant_specifier)* //! variant_specifier := IDENTIFIER | IDENTIFIER "{" typed_identifier_list "}" | IDENTIFIER "(" type_name* ")" //! typed_identifier_list := typed_identifier* //! typed_identifier := IDENTIFIER type_anno //! ``` //! ### Functions //! //! ```text //! func_declaration := func_signature func_body //! func_body := ε | nonempty_func_body //! nonempty_func_body := "{" (statement delimiter)* "}" //! func_signature := "fn" func_name formal_param_list type_anno+ //! func_name := IDENTIFIER | operator //! formal_param_list := "(" (formal_param ",")* ")" //! formal_param := IDENTIFIER type_anno+ //! ``` //! //! ### Variable bindings //! ```text binding_declaration := "let" "mut"? IDENTIFIER "=" expresion``` //! //! ### Interfaces //! //! ```text //! interface_declaration := "interface" type_singleton_name signature_block //! impl_declaration := "impl" type_singleton_name decl_block | "impl" type_singleton_name "for" type_name decl_block //! decl_block := "{" (func_declaration)* "}" //! signature_block := "{" (func_signature)* "}" //! ``` //! //! ### Type Annotations //! //! ```text //! type_anno := ":" type_name //! type_name := type_singleton_name | "(" type_names ")" //! type_names := ε | type_name (, type_name)* //! type_singleton_name = IDENTIFIER (type_params)* //! type_params := "<" type_name (, type_name)* ">" //! ``` //! //! ## Expressions //! ```text //! expression := precedence_expr type_anno+ //! precedence_expr := prefix_expr //! prefix_expr := prefix_op call_expr //! prefix_op := "+" | "-" | "!" | "~" //! call_expr := index_expr ( "(" invocation_list ")" )* | ε //! invocation_list := invocation_argument ("," invocation_argument)* | ε //! invocation_argument := expression | IDENTIFIER "=" expression | "_" //! index_expr := primary ( "[" (expression ("," (expression)* | ε) "]" )* //! primary := literal | paren_expr | if_expr | for_expr | while_expr | identifier_expr | lambda_expr | anonymous_struct | list_expr //! expr_or_block := "{" (statement delimiter)* "}" | expr //! ``` //! //! ### Primary expressions //! //! ```text //! list_expr := "[" (expression, ",")* "]" //! lambda_expr := "\\" lambda_param_list type_anno+ nonempty_func_body //! lambda_param_list := formal_param_list | formal_param //! paren_expr := "(" paren_inner ")" //! paren_inner := (expression ",")* //! identifier_expr := qualified_identifier | named_struct //! qualified_identifier := IDENTIFIER ("::" IDENTIFIER)* //! ``` //! //! ## Literals //! ```text //! literal := "true" | "false" | number_literal | STR_LITERAL //! named_struct := qualified_identifier record_block //! record_block := "{" (record_entry, ",")* | "}" //TODO support anonymus structs, update syntax //! record_entry := IDENTIFIER ":" expression //! anonymous_struct := TODO //! number_literal := int_literal | float_literal //! int_literal = ("0x" | "0b") digits //! float_literal := digits ("." digits) //! digits := (digit_group underscore*)+ //! digit_group := DIGIT+ //! ``` //! //! ### Patterns //! ```text //! pattern := "(" (pattern, ",")* ")" | simple_pattern //! simple_pattern := pattern_literal | record_pattern | tuple_struct_pattern //! pattern_literal := "true" | "false" | signed_number_literal | STR_LITERAL | qualified_identifier //! signed_number_literal := "-"? number_literal //! record_pattern := qualified_identifier "{" (record_pattern_entry, ",")* "}" //! record_pattern_entry := IDENTIFIER | IDENTIFIER ":" Pattern //! tuple_struct_pattern := qualified_identifier "(" (pattern, ",")* ")" //! ``` //! ### If expressions //! //! TODO: it would be nice if the grammar could capture an incomplete precedence expr in the //! discriminator //! //! ```text //! if_expr := "if" discriminator if_expr_body //! if_expr_body := ("then" simple_conditional | "is" simple_pattern_match | cond_block) //! discriminator := ε | expression //! simple_conditional := expr_or_block else_case //! simple_pattern_match := pattern "then" simple_conditional //! else_case := "else" expr_or_block //! //! cond_block := "{" (cond_arm comma_or_delimiter)* "}" //! cond_arm := condition guard "then" expr_or_block | "else" expr_or_block //! condition := "is" pattern | operator precedence_expr | expression //! guard := "if" expression //! comma_or_delimiter := "," | delimiter //! ``` //! //! //! ### While expressions //! ```text //! while_expr := "while" while_cond "{" (statement delimiter)* "}" //! while_cond := ε | expression | expression "is" pattern //TODO maybe is-expresions should be primary //! ``` //! //! //TODO this implies there must be at least one enumerator, which the parser doesn"t support right //! //this second, and maybe should fail later anyway //! ### For-expressions //! ```text //! for_expr := "for" (enumerator | "{" enumerators "}") for_expr_body //! for_expr_body := "return" expression | "{" (statement delimiter)* "}" //! enumerators := enumerator ("," enumerators)* //! enumerator := identifier "<-" expression | identifier "=" expression //TODO add guards, etc. //! ``` //! ## Imports //! ```text //! import := 'import' IDENTIFIER (:: IDENTIFIER)* import_suffix //! import_suffix := ε | '::{' IDENTIFIER (, IDENTIFIER)* '}' | '*' //TODO add qualified, exclusions, etc. //! //! ## Modules //! //! module := 'module' IDENTIFIER '{' statement* '}' //! ``` mod test; use std::rc::Rc; use crate::tokenizing::*; use crate::tokenizing::Kw::*; use crate::tokenizing::TokenKind::*; use crate::tokenizing::Location; use crate::ast::*; use crate::identifier::IdStore; /// Represents a parsing error #[derive(Debug)] pub struct ParseError { pub production_name: Option, pub msg: String, pub token: Token } impl ParseError { fn new_with_token(msg: M, token: Token) -> ParseResult where M: Into { Err(ParseError { msg: msg.into(), token, production_name: None }) } } /// Represents either a successful parsing result or a ParseError pub type ParseResult = Result; #[derive(Debug)] pub struct ParseRecord { production_name: String, next_token: String, level: u32, } /// Main data structure for doing parsing. pub struct Parser { token_handler: TokenHandler, parse_record: Vec, parse_level: u32, restrictions: ParserRestrictions, id_store: IdStore, } struct ParserRestrictions { no_struct_literal: bool } struct TokenHandler { tokens: Vec, idx: usize, end_of_file: Location } impl TokenHandler { fn new(tokens: Vec) -> TokenHandler { let end_of_file = match tokens.last() { None => Location { line_num: 0, char_num : 0 }, Some(t) => t.location, }; TokenHandler { idx: 0, tokens, end_of_file } } fn peek_kind(&mut self) -> TokenKind { self.peek().kind } fn peek_kind_n(&mut self, n: usize) -> TokenKind { self.peek_n(n).kind } fn peek(&mut self) -> Token { self.tokens.get(self.idx).cloned().unwrap_or(Token { kind: TokenKind::EOF, location: self.end_of_file }) } /// calling peek_n(0) is the same thing as peek() fn peek_n(&mut self, n: usize) -> Token { self.tokens.get(self.idx + n).cloned().unwrap_or(Token { kind: TokenKind::EOF, location: self.end_of_file }) } fn next(&mut self) -> Token { self.idx += 1; self.tokens.get(self.idx - 1).cloned().unwrap_or(Token { kind: TokenKind::EOF, location: self.end_of_file }) } } impl Parser { /// Create a new parser initialized with some tokens. pub fn new() -> Parser { Parser { token_handler: TokenHandler::new(vec![]), parse_record: vec![], parse_level: 0, restrictions: ParserRestrictions { no_struct_literal: false }, id_store: IdStore::new(), } } pub fn add_new_tokens(&mut self, new_tokens: Vec) { self.token_handler = TokenHandler::new(new_tokens); } /// Parse all loaded tokens up to this point. pub fn parse(&mut self) -> ParseResult { self.program() } #[allow(dead_code)] pub fn format_parse_trace(&self) -> String { let mut buf = String::new(); buf.push_str("Parse productions:\n"); let mut next_token = None; for r in self.parse_record.iter() { let mut indent = String::new(); for _ in 0..r.level { indent.push('.'); } let effective_token = if next_token == Some(&r.next_token) { "".to_string() } else { next_token = Some(&r.next_token); format!(", next token: {}", r.next_token) }; buf.push_str(&format!("{}`{}`{}\n", indent, r.production_name, effective_token)); } buf } } macro_rules! print_token_pattern { ($tokenpattern:pat) => { stringify!($tokenpattern) } } macro_rules! expect { ($self:expr, $token_kind:pat) => { expect!($self, $token_kind if true) }; ($self:expr, $expected_kind:pat if $cond:expr) => { { let tok = $self.token_handler.peek(); match tok.get_kind() { $expected_kind if $cond => $self.token_handler.next(), actual_kind => { let msg = format!("Expected {}, got {:?}", print_token_pattern!($expected_kind), actual_kind); return ParseError::new_with_token(msg, tok); } } } } } macro_rules! delimited { ($self:expr, $start:pat, $parse_fn:ident, $delim:pat, $end:pat, nonstrict) => { delimited!($self, $start, $parse_fn, $delim, $end, false) }; ($self:expr, $start:pat, $parse_fn:ident, $delim:pat, $end:pat) => { delimited!($self, $start, $parse_fn, $delim, $end, true) }; ($self:expr, $start:pat, $parse_fn:ident, $delim:pat, $end:pat, $strictness:expr) => { { expect!($self, $start); let mut acc = vec![]; loop { let peek = $self.token_handler.peek(); match peek.get_kind() { $end | EOF => break, _ => (), } if !$strictness { match peek.get_kind() { $delim => { $self.token_handler.next(); continue }, _ => () } } acc.push($self.$parse_fn()?); match $self.token_handler.peek().get_kind() { $delim => { $self.token_handler.next(); continue }, _ if $strictness => break, _ => continue, }; } expect!($self, $end); acc } }; } impl Parser { /// `program := (statement delimiter)* EOF` /// `delimiter := NEWLINE | ';'` #[recursive_descent_method] fn program(&mut self) -> ParseResult { let mut statements = Vec::new(); loop { match self.token_handler.peek().get_kind() { EOF => break, Newline | Semicolon => { self.token_handler.next(); continue; }, _ => statements.push( self.statement()? ), } } Ok(AST { id: self.id_store.fresh(), statements: statements.into() }) } /// `statement := expression | declaration` #[recursive_descent_method] fn statement(&mut self) -> ParseResult { //TODO handle error recovery here let tok = self.token_handler.peek(); let kind = match tok.get_kind() { AtSign => self.annotation().map(StatementKind::Declaration), Keyword(Type) => self.type_declaration().map(|decl| { StatementKind::Declaration(decl) }), Keyword(Func)=> self.func_declaration().map(|func| { StatementKind::Declaration(func) }), Keyword(Let) => self.binding_declaration().map(StatementKind::Declaration), Keyword(Interface) => self.interface_declaration().map(StatementKind::Declaration), Keyword(Impl) => self.impl_declaration().map(StatementKind::Declaration), Keyword(Import) => self.import_declaration().map(StatementKind::Import), Keyword(Module) => self.module_declaration().map(StatementKind::Module), _ => self.expression().map(|expr| { StatementKind::Expression(expr) } ), }?; let id = self.id_store.fresh(); Ok(Statement { kind, id, location: tok.location }) } #[recursive_descent_method] fn annotation(&mut self) -> ParseResult { expect!(self, AtSign); let name = self.identifier()?; let arguments = if let LParen = self.token_handler.peek().get_kind() { delimited!(self, LParen, expression, Comma, RParen) } else { vec![] }; Ok(Declaration::Annotation { name, arguments, }) } #[recursive_descent_method] fn type_declaration(&mut self) -> ParseResult { expect!(self, Keyword(Type)); self.type_declaration_body() } #[recursive_descent_method] fn type_declaration_body(&mut self) -> ParseResult { if let Keyword(Alias) = self.token_handler.peek_kind() { self.type_alias() } else { let mutable = if let Keyword(Mut) = self.token_handler.peek_kind() { self.token_handler.next(); true } else { false }; let name = self.type_singleton_name()?; expect!(self, Equals); let body = self.type_body()?; Ok(Declaration::TypeDecl { name, body, mutable}) } } #[recursive_descent_method] fn type_alias(&mut self) -> ParseResult { expect!(self, Keyword(Alias)); let alias = self.identifier()?; expect!(self, Equals); let original = self.identifier()?; Ok(Declaration::TypeAlias { alias, original }) } #[recursive_descent_method] fn type_body(&mut self) -> ParseResult { let mut variants = vec![self.variant_specifier()?]; while let Pipe = self.token_handler.peek_kind() { self.token_handler.next(); variants.push(self.variant_specifier()?); } Ok(TypeBody(variants)) } #[recursive_descent_method] fn variant_specifier(&mut self) -> ParseResult { let name = self.identifier()?; let kind = match self.token_handler.peek_kind() { LParen => { let tuple_members = delimited!(self, LParen, type_name, Comma, RParen); VariantKind::TupleStruct(tuple_members) }, LCurlyBrace => { let typed_identifier_list = delimited!(self, LCurlyBrace, typed_identifier, Comma, RCurlyBrace); VariantKind::Record(typed_identifier_list) }, _ => VariantKind::UnitStruct }; Ok(Variant { id: self.id_store.fresh(), name, kind }) } #[recursive_descent_method] fn typed_identifier(&mut self) -> ParseResult<(Rc, TypeIdentifier)> { let identifier = self.identifier()?; expect!(self, Colon); let type_name = self.type_name()?; Ok((identifier, type_name)) } #[recursive_descent_method] fn func_declaration(&mut self) -> ParseResult { let signature = self.func_signature()?; if let LCurlyBrace = self.token_handler.peek_kind() { let statements = self.nonempty_func_body()?.into(); Ok(Declaration::FuncDecl(signature, statements)) } else { Ok(Declaration::FuncSig(signature)) } } #[recursive_descent_method] fn func_signature(&mut self) -> ParseResult { expect!(self, Keyword(Func)); let (name, operator) = match self.token_handler.peek_kind() { Operator(s) => { let name = s; self.token_handler.next(); (name, true) }, _ => (self.identifier()?, false) }; let params = self.formal_param_list()?; let type_anno = match self.token_handler.peek_kind() { Colon => Some(self.type_anno()?), _ => None, }; Ok(Signature { name, operator, params, type_anno }) } #[recursive_descent_method] fn nonempty_func_body(&mut self) -> ParseResult> { let statements = delimited!(self, LCurlyBrace, statement, Newline | Semicolon, RCurlyBrace, nonstrict); Ok(statements) } #[recursive_descent_method] fn formal_param_list(&mut self) -> ParseResult> { Ok(delimited!(self, LParen, formal_param, Comma, RParen)) } #[recursive_descent_method] fn formal_param(&mut self) -> ParseResult { let name = self.identifier()?; let anno = match self.token_handler.peek_kind() { Colon => Some(self.type_anno()?), _ => None }; let default = match self.token_handler.peek_kind() { Equals => { self.token_handler.next(); Some(self.expression()?) }, _ => None }; Ok(FormalParam { name, anno, default }) } #[recursive_descent_method] fn binding_declaration(&mut self) -> ParseResult { expect!(self, Keyword(Kw::Let)); let constant = match self.token_handler.peek_kind() { Keyword(Kw::Mut) => { self.token_handler.next(); false } _ => true }; let name = self.identifier()?; let type_anno = if let Colon = self.token_handler.peek_kind() { Some(self.type_anno()?) } else { None }; expect!(self, Equals); let expr = self.expression()?; Ok(Declaration::Binding { name, constant, type_anno, expr }) } #[recursive_descent_method] fn interface_declaration(&mut self) -> ParseResult { expect!(self, Keyword(Interface)); let name = self.identifier()?; let signatures = self.signature_block()?; Ok(Declaration::Interface { name, signatures }) } #[recursive_descent_method] fn signature_block(&mut self) -> ParseResult> { Ok(delimited!(self, LCurlyBrace, func_signature, Newline | Semicolon, RCurlyBrace, nonstrict)) } #[recursive_descent_method] fn impl_declaration(&mut self) -> ParseResult { expect!(self, Keyword(Impl)); let first = self.type_singleton_name()?; let second = if let Keyword(For) = self.token_handler.peek_kind() { self.token_handler.next(); Some(self.type_name()?) } else { None }; let block = self.decl_block()?; Ok(match (first, second) { (interface_name, Some(type_name)) => Declaration::Impl { type_name, interface_name: Some(interface_name), block }, (type_singleton_name, None) => Declaration::Impl { type_name: TypeIdentifier::Singleton(type_singleton_name), interface_name: None, block } }) } #[recursive_descent_method] fn decl_block(&mut self) -> ParseResult> { Ok(delimited!(self, LCurlyBrace, func_declaration, Newline | Semicolon, RCurlyBrace, nonstrict)) } #[recursive_descent_method] fn expression(&mut self) -> ParseResult { let mut expr_body = self.precedence_expr(BinOp::min_precedence())?; let type_anno = match self.token_handler.peek_kind() { Colon => Some(self.type_anno()?), _ => None }; if expr_body.type_anno.is_some() { return ParseError::new_with_token("Bad parse state encountered", self.token_handler.peek()); } expr_body.type_anno = type_anno; Ok(expr_body) } #[recursive_descent_method] fn type_anno(&mut self) -> ParseResult { expect!(self, Colon); self.type_name() } #[recursive_descent_method] fn type_name(&mut self) -> ParseResult { use self::TypeIdentifier::*; Ok(match self.token_handler.peek_kind() { LParen => Tuple(delimited!(self, LParen, type_name, Comma, RParen)), _ => Singleton(self.type_singleton_name()?), }) } #[recursive_descent_method] fn type_singleton_name(&mut self) -> ParseResult { Ok(TypeSingletonName { name: self.identifier()?, params: match self.token_handler.peek_kind() { LAngleBracket => delimited!(self, LAngleBracket, type_name, Comma, RAngleBracket), _ => vec![], } }) } // this implements Pratt parsing, see http://journal.stuffwithstuff.com/2011/03/19/pratt-parsers-expression-parsing-made-easy/ #[allow(clippy::while_let_loop)] fn precedence_expr(&mut self, precedence: i32) -> ParseResult { let record = ParseRecord { production_name: "precedence_expr".to_string(), next_token: self.token_handler.peek().to_string_with_metadata(), level: self.parse_level, }; self.parse_level += 1; self.parse_record.push(record); let mut lhs = self.prefix_expr()?; loop { let new_precedence = match BinOp::get_precedence_from_token(&self.token_handler.peek_kind()) { Some(p) => p, None => break, }; if precedence >= new_precedence { break; } let next_tok = self.token_handler.next(); let operation = match BinOp::from_sigil_token(&next_tok.kind) { Some(sigil) => sigil, //TODO I think I can fix this unreachable None => unreachable!() }; let rhs = self.precedence_expr(new_precedence)?; lhs = Expression::new(self.id_store.fresh(), ExpressionKind::BinExp(operation, Box::new(lhs), Box::new(rhs))); } self.parse_level -= 1; Ok(lhs) } #[recursive_descent_method] fn prefix_expr(&mut self) -> ParseResult { match self.token_handler.peek_kind() { Operator(ref op) if PrefixOp::is_prefix(&*op) => { let sigil = match self.token_handler.next().kind { Operator(op) => op, _ => unreachable!(), }; let expr = self.primary()?; let prefix_op = PrefixOp::from_sigil(sigil.as_str()); Ok(Expression::new( self.id_store.fresh(), ExpressionKind::PrefixExp(prefix_op, Box::new(expr)) )) }, _ => self.call_expr() } } #[recursive_descent_method] fn call_expr(&mut self) -> ParseResult { let mut expr = self.index_expr()?; //TODO look at this while let LParen = self.token_handler.peek_kind() { let arguments = delimited!(self, LParen, invocation_argument, Comma, RParen); expr = Expression::new(self.id_store.fresh(), ExpressionKind::Call { f: Box::new(expr), arguments }); //TODO no type anno is incorrect } Ok(expr) } #[recursive_descent_method] fn invocation_argument(&mut self) -> ParseResult { Ok(match self.token_handler.peek_kind() { Underscore => { self.token_handler.next(); InvocationArgument::Ignored }, Identifier(s) => { match self.token_handler.peek_kind_n(1) { Equals => { self.token_handler.next(); self.token_handler.next(); let expr = self.expression()?; InvocationArgument::Keyword { name: s, expr } }, _ => { let expr = self.expression()?; InvocationArgument::Positional(expr) } } }, _ => InvocationArgument::Positional(self.expression()?) }) } #[recursive_descent_method] fn index_expr(&mut self) -> ParseResult { let primary = self.primary()?; Ok(if let LSquareBracket = self.token_handler.peek_kind() { let indexers = delimited!(self, LSquareBracket, expression, Comma, RSquareBracket); Expression::new(self.id_store.fresh(), ExpressionKind::Index { indexee: Box::new(Expression::new(self.id_store.fresh(), primary.kind)), indexers, }) } else { primary }) } #[recursive_descent_method] fn primary(&mut self) -> ParseResult { match self.token_handler.peek_kind() { LCurlyBrace => self.curly_brace_expr(), Backslash => self.lambda_expr(), LParen => self.paren_expr(), LSquareBracket => self.list_expr(), Keyword(Kw::If) => self.if_expr(), Keyword(Kw::For) => self.for_expr(), Keyword(Kw::While) => self.while_expr(), Identifier(_) => self.identifier_expr(), _ => self.literal(), } } #[recursive_descent_method] fn list_expr(&mut self) -> ParseResult { let exprs = delimited!(self, LSquareBracket, expression, Comma, RSquareBracket); Ok(Expression::new(self.id_store.fresh(), ExpressionKind::ListLiteral(exprs))) } #[recursive_descent_method] fn curly_brace_expr(&mut self) -> ParseResult { ParseError::new_with_token("Not implemented", self.token_handler.peek()) } #[recursive_descent_method] fn lambda_expr(&mut self) -> ParseResult { expect!(self, Backslash); let params = self.lambda_param_list()?; let type_anno = match self.token_handler.peek_kind() { Colon => Some(self.type_anno()?), _ => None, }; let body = self.nonempty_func_body()?.into(); Ok(Expression::new(self.id_store.fresh(), ExpressionKind::Lambda { params, type_anno, body })) //TODO need to handle types somehow } #[recursive_descent_method] fn lambda_param_list(&mut self) -> ParseResult> { if let LParen = self.token_handler.peek_kind() { self.formal_param_list() } else { let single_param = self.formal_param()?; Ok(vec![single_param]) } } #[recursive_descent_method] fn paren_expr(&mut self) -> ParseResult { use self::ExpressionKind::*; let old_struct_value = self.restrictions.no_struct_literal; self.restrictions.no_struct_literal = false; let output = { let mut inner = delimited!(self, LParen, expression, Comma, RParen); match inner.len() { 0 => Ok(Expression::new(self.id_store.fresh(), TupleLiteral(vec![]))), 1 => Ok(inner.pop().unwrap()), _ => Ok(Expression::new(self.id_store.fresh(), TupleLiteral(inner))) } }; self.restrictions.no_struct_literal = old_struct_value; output } #[recursive_descent_method] fn identifier_expr(&mut self) -> ParseResult { use self::ExpressionKind::*; let components = self.qualified_identifier()?; let qualified_identifier = QualifiedName { id: self.id_store.fresh(), components }; Ok(match self.token_handler.peek_kind() { LCurlyBrace if !self.restrictions.no_struct_literal => { let fields = self.record_block()?; Expression::new(self.id_store.fresh(), NamedStruct { name: qualified_identifier, fields }) }, _ => Expression::new(self.id_store.fresh(), Value(qualified_identifier)) }) } #[recursive_descent_method] fn qualified_identifier(&mut self) -> ParseResult>> { let mut components = vec![self.identifier()?]; while let (Colon, Colon) = (self.token_handler.peek_kind(), self.token_handler.peek_kind_n(1)) { self.token_handler.next(); self.token_handler.next(); components.push(self.identifier()?); } Ok(components) } #[recursive_descent_method] fn record_block(&mut self) -> ParseResult, Expression)>> { Ok(delimited!(self, LCurlyBrace, record_entry, Comma, RCurlyBrace)) } #[recursive_descent_method] fn record_entry(&mut self) -> ParseResult<(Rc, Expression)> { let field_name = self.identifier()?; expect!(self, Colon); let value = self.expression()?; Ok((field_name, value)) } #[recursive_descent_method] fn if_expr(&mut self) -> ParseResult { expect!(self, Keyword(Kw::If)); let old_struct_value = self.restrictions.no_struct_literal; self.restrictions.no_struct_literal = true; let discriminator = if let LCurlyBrace = self.token_handler.peek_kind() { None } else { Some(Box::new(self.expression()?)) }; let body = Box::new(self.if_expr_body()?); self.restrictions.no_struct_literal = old_struct_value; Ok(Expression::new(self.id_store.fresh(), ExpressionKind::IfExpression { discriminator, body })) } #[recursive_descent_method] fn if_expr_body(&mut self) -> ParseResult { match self.token_handler.peek_kind() { Keyword(Kw::Then) => self.simple_conditional(), Keyword(Kw::Is) => self.simple_pattern_match(), _ => self.cond_block(), } } #[recursive_descent_method] fn simple_conditional(&mut self) -> ParseResult { expect!(self, Keyword(Kw::Then)); let then_case = self.expr_or_block()?; let else_case = self.else_case()?; Ok(IfExpressionBody::SimpleConditional {then_case, else_case }) } #[recursive_descent_method] fn simple_pattern_match(&mut self) -> ParseResult { expect!(self, Keyword(Kw::Is)); let pattern = self.pattern()?; expect!(self, Keyword(Kw::Then)); let then_case = self.expr_or_block()?; let else_case = self.else_case()?; Ok(IfExpressionBody::SimplePatternMatch { pattern, then_case, else_case }) } #[recursive_descent_method] fn else_case(&mut self) -> ParseResult> { Ok(if let Keyword(Kw::Else) = self.token_handler.peek_kind() { self.token_handler.next(); Some(self.expr_or_block()?) } else { None }) } #[recursive_descent_method] fn cond_block(&mut self) -> ParseResult { expect!(self, LCurlyBrace); let mut cond_arms = vec![]; loop { match self.token_handler.peek_kind() { RCurlyBrace | EOF => break, Semicolon | Newline => { self.token_handler.next(); continue}, _ => { cond_arms.push(self.cond_arm()?); match self.token_handler.peek_kind() { Comma | Semicolon | Newline => { self.token_handler.next(); continue; }, _ => break, } } } } expect!(self, RCurlyBrace); Ok(IfExpressionBody::CondList(cond_arms)) } #[recursive_descent_method] fn cond_arm(&mut self) -> ParseResult { let (condition, guard) = if let Keyword(Kw::Else) = self.token_handler.peek_kind() { self.token_handler.next(); (Condition::Else, None) } else { let condition = self.condition()?; let guard = self.guard()?; expect!(self, Keyword(Kw::Then)); (condition, guard) }; let body = self.expr_or_block()?; Ok(ConditionArm { condition, guard, body }) } #[recursive_descent_method] fn condition(&mut self) -> ParseResult { Ok(match self.token_handler.peek_kind() { Keyword(Kw::Is) => { self.token_handler.next(); Condition::Pattern(self.pattern()?) }, ref tok if BinOp::from_sigil_token(tok).is_some() => { let op = BinOp::from_sigil_token(&self.token_handler.next().kind).unwrap(); let expr = self.expression()?; Condition::TruncatedOp(op, expr) }, _ => { Condition::Expression(self.expression()?) }, }) } #[recursive_descent_method] fn guard(&mut self) -> ParseResult> { Ok(match self.token_handler.peek_kind() { Keyword(Kw::If) => { self.token_handler.next(); Some(self.expression()?) }, _ => None }) } #[recursive_descent_method] fn pattern(&mut self) -> ParseResult { if let LParen = self.token_handler.peek_kind() { let tuple_pattern_variants = delimited!(self, LParen, pattern, Comma, RParen); Ok(Pattern::TuplePattern(tuple_pattern_variants)) } else { self.simple_pattern() } } #[recursive_descent_method] fn simple_pattern(&mut self) -> ParseResult { Ok(match self.token_handler.peek_kind() { Identifier(_) => { let components = self.qualified_identifier()?; let qualified_identifier = QualifiedName { id: self.id_store.fresh(), components }; match self.token_handler.peek_kind() { LCurlyBrace => { let members = delimited!(self, LCurlyBrace, record_pattern_entry, Comma, RCurlyBrace); Pattern::Record(qualified_identifier, members) }, LParen => { let members = delimited!(self, LParen, pattern, Comma, RParen); Pattern::TupleStruct(qualified_identifier, members) }, _ => { Pattern::VarOrName(qualified_identifier) }, } }, _ => self.pattern_literal()? }) } #[recursive_descent_method] fn pattern_literal(&mut self) -> ParseResult { let tok = self.token_handler.peek(); Ok(match tok.get_kind() { Keyword(Kw::True) => { self.token_handler.next(); Pattern::Literal(PatternLiteral::BoolPattern(true)) }, Keyword(Kw::False) => { self.token_handler.next(); Pattern::Literal(PatternLiteral::BoolPattern(false)) }, StrLiteral { s, .. } => { self.token_handler.next(); Pattern::Literal(PatternLiteral::StringPattern(s)) }, DigitGroup(_) | HexLiteral(_) | BinNumberSigil | Period => self.signed_number_literal()?, Operator(ref op) if **op == "-" => self.signed_number_literal()?, Underscore => { self.token_handler.next(); Pattern::Ignored }, other => return ParseError::new_with_token(format!("{:?} is not a valid Pattern", other), tok) }) } #[recursive_descent_method] fn signed_number_literal(&mut self) -> ParseResult { let neg = match self.token_handler.peek_kind() { Operator(ref op) if **op == "-" => { self.token_handler.next(); true }, _ => false }; let Expression { kind, .. } = self.number_literal()?; Ok(Pattern::Literal(PatternLiteral::NumPattern { neg, num: kind })) } #[recursive_descent_method] fn record_pattern_entry(&mut self) -> ParseResult<(Rc, Pattern)> { let name = self.identifier()?; Ok(match self.token_handler.peek_kind() { Colon => { expect!(self, Colon); let pat = self.pattern()?; (name, pat) }, _ => (name.clone(), Pattern::Literal(PatternLiteral::StringPattern(name))) }) } #[recursive_descent_method] fn block(&mut self) -> ParseResult { let block = delimited!(self, LCurlyBrace, statement, Newline | Semicolon, RCurlyBrace, nonstrict); Ok(block.into()) } #[recursive_descent_method] fn expr_or_block(&mut self) -> ParseResult { let tok = self.token_handler.peek(); match tok.get_kind() { LCurlyBrace => self.block(), _ => { let expr = self.expression()?; let s = Statement { id: self.id_store.fresh(), location: tok.location, kind: StatementKind::Expression(expr) }; Ok(s.into()) } } } #[recursive_descent_method] fn while_expr(&mut self) -> ParseResult { use self::ExpressionKind::*; expect!(self, Keyword(Kw::While)); let condition = { self.restrictions.no_struct_literal = true; let x = self.while_cond(); self.restrictions.no_struct_literal = false; x?.map(Box::new) }; let body = self.block()?; Ok(Expression::new(self.id_store.fresh(), WhileExpression {condition, body})) } #[recursive_descent_method] fn while_cond(&mut self) -> ParseResult> { Ok(match self.token_handler.peek_kind() { LCurlyBrace => None, _ => Some(self.expression()?), }) } #[recursive_descent_method] fn for_expr(&mut self) -> ParseResult { expect!(self, Keyword(Kw::For)); let enumerators = if let LCurlyBrace = self.token_handler.peek_kind() { delimited!(self, LCurlyBrace, enumerator, Comma | Newline, RCurlyBrace) } else { let single_enum = { self.restrictions.no_struct_literal = true; let s = self.enumerator(); self.restrictions.no_struct_literal = false; s? }; vec![single_enum] }; let body = Box::new(self.for_expr_body()?); Ok(Expression::new(self.id_store.fresh(), ExpressionKind::ForExpression { enumerators, body })) } #[recursive_descent_method] fn enumerator(&mut self) -> ParseResult { let id = self.identifier()?; expect!(self, Operator(ref c) if **c == "<-"); let generator = self.expression()?; Ok(Enumerator { id, generator }) } #[recursive_descent_method] fn for_expr_body(&mut self) -> ParseResult { use self::ForBody::*; let tok = self.token_handler.peek(); Ok(match tok.get_kind() { LCurlyBrace => { let statements = delimited!(self, LCurlyBrace, statement, Newline | Semicolon, RCurlyBrace, nonstrict); StatementBlock(statements.into()) }, Keyword(Kw::Return) => { self.token_handler.next(); MonadicReturn(self.expression()?) }, _ => return ParseError::new_with_token("for expressions must end in a block or 'return'", tok), }) } #[recursive_descent_method] fn identifier(&mut self) -> ParseResult> { let tok = self.token_handler.next(); match tok.get_kind() { Identifier(s) => Ok(s), p => ParseError::new_with_token(format!("Expected an identifier, got {:?}", p), tok), } } #[recursive_descent_method] fn literal(&mut self) -> ParseResult { use self::ExpressionKind::*; let tok = self.token_handler.peek(); match tok.get_kind() { DigitGroup(_) | HexLiteral(_) | BinNumberSigil | Period => self.number_literal(), Keyword(Kw::True) => { self.token_handler.next(); let id = self.id_store.fresh(); Ok(Expression::new(id, BoolLiteral(true))) }, Keyword(Kw::False) => { self.token_handler.next(); let id = self.id_store.fresh(); Ok(Expression::new(id, BoolLiteral(false))) }, StrLiteral {s, ..} => { self.token_handler.next(); let id = self.id_store.fresh(); Ok(Expression::new(id, StringLiteral(s))) } e => ParseError::new_with_token(format!("Expected a literal expression, got {:?}", e), tok), } } #[recursive_descent_method] fn number_literal(&mut self) -> ParseResult { match self.token_handler.peek_kind() { HexLiteral(_) | BinNumberSigil => self.int_literal(), _ => self.float_literal(), } } #[recursive_descent_method] fn int_literal(&mut self) -> ParseResult { use self::ExpressionKind::*; let tok = self.token_handler.next(); match tok.get_kind() { BinNumberSigil => { let digits = self.digits()?; let n = parse_binary(digits, tok)?; Ok(Expression::new(self.id_store.fresh(), NatLiteral(n))) }, HexLiteral(text) => { let digits: String = text.chars().filter(|c| c.is_digit(16)).collect(); let n = parse_hex(digits, tok)?; Ok(Expression::new(self.id_store.fresh(), NatLiteral(n))) }, _ => return ParseError::new_with_token("Expected '0x' or '0b'", tok), } } #[recursive_descent_method] fn float_literal(&mut self) -> ParseResult { use self::ExpressionKind::*; let tok = self.token_handler.peek(); let mut digits = self.digits()?; if let Period = self.token_handler.peek_kind() { self.token_handler.next(); digits.push('.'); digits.push_str(&self.digits()?); match digits.parse::() { Ok(f) => Ok(Expression::new(self.id_store.fresh(), FloatLiteral(f))), Err(e) => ParseError::new_with_token(format!("Float failed to parse with error: {}", e), tok), } } else { match digits.parse::() { Ok(d) => Ok(Expression::new(self.id_store.fresh(), NatLiteral(d))), Err(e) => ParseError::new_with_token(format!("Integer failed to parse with error: {}", e), tok), } } } #[recursive_descent_method] fn digits(&mut self) -> ParseResult { let mut ds = String::new(); loop { match self.token_handler.peek_kind() { Underscore => { self.token_handler.next(); continue; }, DigitGroup(ref s) => { self.token_handler.next(); ds.push_str(s)}, _ => break, } } Ok(ds) } #[recursive_descent_method] fn import_declaration(&mut self) -> ParseResult { expect!(self, Keyword(Import)); let mut path_components = vec![self.identifier()?]; while let (Colon, Colon) = (self.token_handler.peek_kind(), self.token_handler.peek_kind_n(1)) { self.token_handler.next(); self.token_handler.next(); if let Identifier(_) = self.token_handler.peek_kind() { path_components.push(self.identifier()?); } else { break; } } let imported_names = match self.token_handler.peek_kind() { LCurlyBrace => { let names = delimited!(self, LCurlyBrace, identifier, Comma, RCurlyBrace); ImportedNames::List(names) }, Operator(ref s) if **s == "*" => { self.token_handler.next(); ImportedNames::All }, _ => ImportedNames::LastOfPath }; Ok(ImportSpecifier { id: self.id_store.fresh(), path_components, imported_names }) } #[recursive_descent_method] fn import_suffix(&mut self) -> ParseResult { Ok(match self.token_handler.peek_kind() { Operator(ref s) if **s == "*" => { self.token_handler.next(); ImportedNames::All }, LCurlyBrace => { let names = delimited!(self, LCurlyBrace, identifier, Comma, RCurlyBrace); ImportedNames::List(names) }, _ => return ParseError::new_with_token("Expected '{{' or '*'", self.token_handler.peek()), }) } #[recursive_descent_method] fn module_declaration(&mut self) -> ParseResult { expect!(self, Keyword(Kw::Module)); let name = self.identifier()?; let contents = delimited!(self, LCurlyBrace, statement, Newline | Semicolon, RCurlyBrace, nonstrict); Ok(ModuleSpecifier { name, contents: contents.into() }) } } fn parse_binary(digits: String, tok: Token) -> ParseResult { let mut result: u64 = 0; let mut multiplier = 1; for d in digits.chars().rev() { match d { '1' => result += multiplier, '0' => (), _ => return ParseError::new_with_token("Encountered a character not '1' or '0 while parsing a binary literal", tok), } multiplier = match multiplier.checked_mul(2) { Some(m) => m, None => return ParseError::new_with_token("This binary expression will overflow", tok) } } Ok(result) } fn parse_hex(digits: String, tok: Token) -> ParseResult { let mut result: u64 = 0; let mut multiplier: u64 = 1; for d in digits.chars().rev() { match d.to_digit(16) { Some(n) => result += n as u64 * multiplier, None => return ParseError::new_with_token("Encountered a non-hex digit in a hex literal", tok), } multiplier = match multiplier.checked_mul(16) { Some(m) => m, None => return ParseError::new_with_token("This hex expression will overflow", tok) } } Ok(result) }