use std::rc::Rc; use crate::ast::*; peg::parser! { pub grammar schala_parser() for str { rule whitespace() = [' ' | '\t' | '\n']* rule _ = quiet!{ whitespace() } pub rule program() -> AST = n:(statement() ** delimiter() ) { AST { id: Default::default(), statements: n.into() } } rule delimiter() = ";" / "\n" rule statement() -> Statement = _ expr:expression() { Statement { id: Default::default(), location: Default::default(), kind: StatementKind::Expression(expr) } } rule block() -> Block = "{" _ items:(statement() ** delimiter()) _ "}" { items.into() } pub rule expression() -> Expression = _ kind:expression_kind() { Expression { id: Default::default(), type_anno: None, kind: kind } } rule expression_kind() -> ExpressionKind = precedence_expr() rule expression_kind_no_struct() -> ExpressionKind = precedence_expr_no_struct() rule precedence_expr() -> ExpressionKind = first:prefix_expr() _ next:(precedence_continuation())* { let next = next.into_iter().map(|(sigil, expr)| (BinOp::from_sigil(sigil), expr)).collect(); BinopSequence { first, next }.do_precedence() } rule precedence_continuation() -> (&'input str, ExpressionKind) = op:operator() _ expr:prefix_expr() _ { (op, expr) } rule precedence_expr_no_struct() -> ExpressionKind = first:prefix_expr() _ next:(precedence_continuation_no_struct())* { let next = next.into_iter().map(|(sigil, expr)| (BinOp::from_sigil(sigil), expr)).collect(); BinopSequence { first, next }.do_precedence() } rule precedence_continuation_no_struct() -> (&'input str, ExpressionKind) = op:operator() _ expr:prefix_expr_no_struct() _ { (op, expr) } rule prefix_expr() -> ExpressionKind = prefix:prefix()? expr:extended_expr() { if let Some(p) = prefix { let expr = Expression::new(Default::default(), expr); let prefix = PrefixOp::from_sigil(p); ExpressionKind::PrefixExp(prefix, Box::new(expr)) } else { expr } } rule prefix_expr_no_struct() -> ExpressionKind = prefix:prefix()? expr:extended_expr_no_struct() { if let Some(p) = prefix { let expr = Expression::new(Default::default(), expr); let prefix = PrefixOp::from_sigil(p); ExpressionKind::PrefixExp(prefix, Box::new(expr)) } else { expr } } rule prefix() -> &'input str = $(['+' | '-' | '!' ]) //TODO make the definition of operators more complex rule operator() -> &'input str = quiet!{$( ['+' | '-' | '*' | '/' | '%' | '<' | '>' | '=' | '!' | '$' | '&' | '|' | '?' | '^' | '`']+ )} / expected!("operator") #[cache_left_rec] rule extended_expr() -> ExpressionKind = indexee:extended_expr() indexers:index_part() { ExpressionKind::Index { indexee: Box::new(Expression::new(Default::default(), indexee)), indexers, } } / f:extended_expr() arguments:call_part() { ExpressionKind::Call { f: Box::new(Expression::new(Default::default(), f)), arguments, } } / expr:extended_expr() "." name:identifier() { ExpressionKind::Access { name: Rc::new(name.to_string()), expr: Box::new(Expression::new(Default::default(),expr)), } } / primary() #[cache_left_rec] rule extended_expr_no_struct() -> ExpressionKind = indexee:extended_expr() indexers:index_part() { ExpressionKind::Index { indexee: Box::new(Expression::new(Default::default(), indexee)), indexers, } } / f:extended_expr() arguments:call_part() { ExpressionKind::Call { f: Box::new(Expression::new(Default::default(), f)), arguments, } } / expr:extended_expr() "." name:identifier() { ExpressionKind::Access { name: Rc::new(name.to_string()), expr: Box::new(Expression::new(Default::default(),expr)), } } / primary_no_struct() rule index_part() -> Vec = "[" indexers:(expression() ++ ",") "]" { indexers } rule call_part() -> Vec = "(" arguments:(invocation_argument() ** ",") ")" { arguments } //TODO this shouldn't be an expression b/c type annotations disallowed here rule invocation_argument() -> InvocationArgument = _ "_" _ { InvocationArgument::Ignored } / _ ident:identifier() _ "=" _ expr:expression() { InvocationArgument::Keyword { name: Rc::new(ident.to_string()), expr } } / _ expr:expression() _ { InvocationArgument::Positional(expr) } rule primary_no_struct() -> ExpressionKind = while_expr() / float_literal() / nat_literal() / bool_literal() / string_literal() / paren_expr() / list_expr() / if_expr() / identifier_expr() rule primary() -> ExpressionKind = while_expr() / float_literal() / nat_literal() / bool_literal() / string_literal() / paren_expr() / list_expr() / if_expr() / named_struct() / identifier_expr() rule while_expr() -> ExpressionKind = "while" _ cond:expression_kind_no_struct()? _ body:block() { ExpressionKind::WhileExpression { condition: cond.map(|kind| Box::new(Expression::new(Default::default(), kind))), body, } } rule identifier_expr() -> ExpressionKind = qn:qualified_identifier() { ExpressionKind::Value(qn) } rule named_struct() -> ExpressionKind = name:qualified_identifier() _ fields:record_block() { ExpressionKind::NamedStruct { name, fields: fields.into_iter().map(|(n, exp)| (Rc::new(n.to_string()), exp)).collect(), } } //TODO anonymous structs, update syntax for structs rule record_block() -> Vec<(&'input str, Expression)> = "{" _ entries:(record_entry() ** ",") _ "}" { entries } rule record_entry() -> (&'input str, Expression) = _ name:identifier() _ ":" _ expr:expression() _ { (name, expr) } rule qualified_identifier() -> QualifiedName = names:(identifier() ++ "::") { QualifiedName { id: Default::default(), components: names.into_iter().map(|name| Rc::new(name.to_string())).collect() } } //TODO improve the definition of identifiers rule identifier() -> &'input str = $(['a'..='z' | 'A'..='Z' | '_'] ['a'..='z' | 'A'..='Z' | '0'..='9' | '_']*) rule if_expr() -> ExpressionKind = "if" _ discriminator:(expression()?) _ body:if_expr_body() { ExpressionKind::IfExpression { discriminator: discriminator.map(Box::new), body: Box::new(body), } } rule if_expr_body() -> IfExpressionBody = cond_block() / simple_pattern_match() / simple_conditional() rule simple_conditional() -> IfExpressionBody = "then" _ then_case:expr_or_block() _ else_case:else_case() { IfExpressionBody::SimpleConditional { then_case, else_case } } rule simple_pattern_match() -> IfExpressionBody = "is" _ pattern:pattern() _ "then" _ then_case:expr_or_block() _ else_case:else_case() { IfExpressionBody::SimplePatternMatch { pattern, then_case, else_case } } rule cond_block() -> IfExpressionBody = "{" _ cond_arms:(cond_arm() ++ ",") _ "}" { IfExpressionBody::CondList(cond_arms) } rule cond_arm() -> ConditionArm = _ "else" _ body:expr_or_block() { ConditionArm { condition: Condition::Else, guard: None, body } } / _ condition:condition() _ guard:condition_guard() _ "then" _ body:expr_or_block() { ConditionArm { condition, guard, body } } rule condition() -> Condition = "is" _ pat:pattern() { Condition::Pattern(pat) } / op:operator() _ expr:expression() { Condition::TruncatedOp(BinOp::from_sigil(op), expr) } rule condition_guard() -> Option = ("if" _ expr:expression() { expr } )? rule expr_or_block() -> Block = block() / ex:expression() { Statement { id: Default::default(), location: Default::default(), kind: StatementKind::Expression(ex) }.into() } rule else_case() -> Option = ("else" _ eorb:expr_or_block() { eorb })? rule pattern() -> Pattern = "(" _ variants:(pattern() ++ ",") _ ")" { Pattern::TuplePattern(variants) } / _ pat:simple_pattern() { pat } rule simple_pattern() -> Pattern = pattern_literal() / qn:qualified_identifier() "(" members:(pattern() ** ",") ")" { Pattern::TupleStruct(qn, members) } / qn:qualified_identifier() _ "{" _ items:(record_pattern_entry() ** ",") "}" _ { let items = items.into_iter().map(|(name, pat)| (Rc::new(name.to_string()), pat)).collect(); Pattern::Record(qn, items) } / qn:qualified_identifier() { Pattern::VarOrName(qn) } rule record_pattern_entry() -> (&'input str, Pattern) = _ name:identifier() _ ":" _ pat:pattern() _ { (name, pat) } / _ name:identifier() _ { let qn = QualifiedName { id: Default::default(), components: vec![Rc::new(name.to_string())], }; (name, Pattern::VarOrName(qn)) } rule pattern_literal() -> Pattern = "true" { Pattern::Literal(PatternLiteral::BoolPattern(true)) } / "false" { Pattern::Literal(PatternLiteral::BoolPattern(false)) } / s:bare_string_literal() { Pattern::Literal(PatternLiteral::StringPattern(Rc::new(s.to_string()))) } / sign:("-"?) num:nat_literal() { let neg = sign.is_some(); Pattern::Literal(PatternLiteral::NumPattern { neg, num }) } / "_" { Pattern::Ignored } rule list_expr() -> ExpressionKind = "[" exprs:(expression() ** ",") "]" { let mut exprs = exprs; ExpressionKind::ListLiteral(exprs) } rule paren_expr() -> ExpressionKind = "(" exprs:(expression() ** ",") ")" { let mut exprs = exprs; match exprs.len() { 1 => exprs.pop().unwrap().kind, _ => ExpressionKind::TupleLiteral(exprs), } } rule string_literal() -> ExpressionKind = s:bare_string_literal(){ ExpressionKind::StringLiteral(Rc::new(s.to_string())) } //TODO string escapes, prefixes rule bare_string_literal() -> &'input str = "\"" items:$([^ '"' ]*) "\"" { items } rule bool_literal() -> ExpressionKind = "true" { ExpressionKind::BoolLiteral(true) } / "false" { ExpressionKind::BoolLiteral(false) } rule nat_literal() -> ExpressionKind = bin_literal() / hex_literal() / unmarked_literal() rule unmarked_literal() -> ExpressionKind = digits:digits() { ExpressionKind::NatLiteral(digits.parse().unwrap()) } rule bin_literal() -> ExpressionKind = "0b" digits:bin_digits() { ExpressionKind::NatLiteral(parse_binary(digits)) } rule hex_literal() -> ExpressionKind = "0x" digits:hex_digits() { ExpressionKind::NatLiteral(parse_hex(digits)) } rule float_literal() -> ExpressionKind = ds:$( digits() "." digits()? / "." digits() ) { ExpressionKind::FloatLiteral(ds.parse().unwrap()) } rule digits() -> &'input str = $((digit_group() "_"*)+) rule bin_digits() -> &'input str = $((bin_digit_group() "_"*)+) rule hex_digits() -> &'input str = $((hex_digit_group() "_"*)+) rule digit_group() -> &'input str = $(['0'..='9']+) rule bin_digit_group() -> &'input str = $(['0' | '1']+) rule hex_digit_group() -> &'input str = $(['0'..='9' | 'a'..='f' | 'A'..='F']+) } } fn parse_binary(digits: &str /*, tok: Token*/) -> u64 { let mut result: u64 = 0; let mut multiplier = 1; for d in digits.chars().rev() { match d { '1' => result += multiplier, '0' => (), '_' => continue, _ => unreachable!(), } multiplier = match multiplier.checked_mul(2) { Some(m) => m, None => /*return ParseError::new_with_token("This binary expression will overflow", tok),*/ panic!(), } } //Ok(result) result } //TODO fix these two functions fn parse_hex(digits: &str) -> u64 { let mut result: u64 = 0; let mut multiplier: u64 = 1; for d in digits.chars().rev() { if d == '_' { continue; } match d.to_digit(16) { Some(n) => result += n as u64 * multiplier, None => panic!(), } multiplier = match multiplier.checked_mul(16) { Some(m) => m, None => panic!(), } } result } #[derive(Debug)] struct BinopSequence { first: ExpressionKind, next: Vec<(BinOp, ExpressionKind)>, } impl BinopSequence { fn do_precedence(self) -> ExpressionKind { fn helper( precedence: i32, lhs: ExpressionKind, rest: &mut Vec<(BinOp, ExpressionKind)>, ) -> Expression { let mut lhs = Expression::new(Default::default(), lhs); loop { let (next_op, next_rhs) = match rest.pop() { Some((a, b)) => (a, b), None => break, }; let new_precedence = next_op.get_precedence(); if precedence >= new_precedence { rest.push((next_op, next_rhs)); break; } let rhs = helper(new_precedence, next_rhs, rest); lhs = Expression::new( Default::default(), ExpressionKind::BinExp(next_op, Box::new(lhs), Box::new(rhs)), ); } lhs } let mut as_stack = self.next.into_iter().rev().collect(); helper(BinOp::min_precedence(), self.first, &mut as_stack).kind } }