use std::rc::Rc; use crate::ast::*; fn rc_string(s: &str) -> Rc { Rc::new(s.to_string()) } peg::parser! { pub grammar schala_parser() for str { rule whitespace() = [' ' | '\t' ]* rule whitespace_or_newline() = [' ' | '\t' | '\n' ]* rule _ = quiet!{ whitespace() } rule __ = quiet!{ whitespace_or_newline() } pub rule program() -> AST = __ statements:(statement() ** delimiter() ) __ { AST { id: Default::default(), statements: statements.into() } } rule delimiter() = (";" / "\n")+ //Note - this is a hack, ideally the rule `rule block() -> Block = "{" _ items:(statement() ** //delimiter()) _ "}" { items.into() }` would've worked, but it doesn't. pub rule block() -> Block = "{" __ items:block_item()* __ "}" { items.into() } / "{" __ stmt:statement() __ "}" { vec![stmt].into() } rule block_item() -> Statement = stmt:statement() delimiter()+ { stmt } rule statement() -> Statement = kind:statement_kind() { Statement { id: Default::default(), location: Default::default(), kind } } rule statement_kind() -> StatementKind = __ import:import() { StatementKind::Import(import) } / __ decl:declaration() { StatementKind::Declaration(decl) } / __ expr:expression() { StatementKind::Expression(expr) } rule import() -> ImportSpecifier = "import" _ path_components:path_components() suffix:import_suffix()? { ImportSpecifier { id: Default::default(), path_components, imported_names: suffix.unwrap_or_else(|| ImportedNames::LastOfPath) } } rule path_components() -> Vec> = "::"? name:identifier() rest:path_component()* { let mut items = vec![rc_string(name)]; items.extend(rest.into_iter().map(|n| rc_string(n))); items } rule path_component() -> &'input str = "::" ident:identifier() { ident } rule import_suffix() -> ImportedNames = "::*" { ImportedNames::All } / "::{" __ names:(identifier() ** (_ "," _)) __ "}" { ImportedNames::List(names.into_iter().map(rc_string).collect()) } rule declaration() -> Declaration = binding() / type_decl() / annotation() / func() / interface() / implementation() / module() rule module() -> Declaration = "module" _ name:identifier() _ items:block() { Declaration::Module { name: rc_string(name), items } } rule implementation() -> Declaration = "impl" _ interface:type_singleton_name() _ "for" _ type_name:type_identifier() _ block:decl_block() { Declaration::Impl { type_name, interface_name: Some(interface), block } } / "impl" _ type_name:type_identifier() _ block:decl_block() { Declaration::Impl { type_name, interface_name: None, block } } rule decl_block() -> Vec = "{" __ decls:(func_declaration() ** (delimiter()+)) __ "}" { decls } rule interface() -> Declaration = "interface" _ name:identifier() _ signatures:signature_block() { Declaration::Interface { name: rc_string(name), signatures } } rule signature_block() -> Vec = "{" __ signatures:(func_signature() ** (delimiter()+)) __ "}" { signatures } rule func() -> Declaration = decl:func_declaration() { decl } / sig:func_signature() { Declaration::FuncSig(sig) } rule func_declaration() -> Declaration = _ sig:func_signature() __ body:block() { Declaration::FuncDecl(sig, body) } //TODO handle operators rule func_signature() -> Signature = _ "fn" _ name:identifier() "(" _ params:formal_params() _ ")" _ type_anno:type_anno()? { Signature { name: rc_string(name), operator: false, params, type_anno } } rule formal_params() -> Vec = params:(formal_param() ** (_ "," _)) {? if params.len() < 256 { Ok(params) } else { Err("function-too-long") } } rule formal_param() -> FormalParam = name:identifier() _ anno:type_anno()? _ "=" expr:expression() { FormalParam { name: rc_string(name), default: Some(expr), anno } } / name:identifier() _ anno:type_anno()? { FormalParam { name: rc_string(name), default: None, anno } } rule annotation() -> Declaration = "@" name:identifier() args:annotation_args()? delimiter()+ _ inner:statement() { Declaration::Annotation { name: rc_string(name), arguments: if let Some(args) = args { args } else { vec![] }, inner: Box::new(inner) } } rule annotation_args() -> Vec = "(" _ args:(expression() ** (_ "," _)) _ ")" { args } rule binding() -> Declaration = "let" _ mutable:"mut"? _ ident:identifier() _ type_anno:type_anno()? _ "=" _ expr:expression() { Declaration::Binding { name: Rc::new(ident.to_string()), constant: mutable.is_none(), type_anno, expr } } rule type_decl() -> Declaration = "type" _ "alias" _ alias:type_alias() { alias } / "type" _ mutable:"mut"? _ name:type_singleton_name() _ "=" _ body:type_body() { Declaration::TypeDecl { name, body, mutable: mutable.is_some() } } rule type_singleton_name() -> TypeSingletonName = name:identifier() params:type_params()? { TypeSingletonName { name: rc_string(name), params: if let Some(params) = params { params } else { vec![] } } } rule type_params() -> Vec = "<" _ idents:(type_identifier() ** (_ "," _)) _ ">" { idents } rule type_identifier() -> TypeIdentifier = "(" _ items:(type_identifier() ** (_ "," _)) _ ")" { TypeIdentifier::Tuple(items) } / singleton:type_singleton_name() { TypeIdentifier::Singleton(singleton) } rule type_body() -> TypeBody = "{" _ items:(record_variant_item() ++ (_ "," _)) _ "}" { TypeBody::ImmediateRecord(Default::default(), items) } / variants:(variant_spec() ** (_ "|" _)) { TypeBody::Variants(variants) } rule variant_spec() -> Variant = name:identifier() _ "{" _ typed_identifier_list:(record_variant_item() ++ (_ "," _)) _ "}" { Variant { id: Default::default(), name: rc_string(name), kind: VariantKind::Record(typed_identifier_list) } } / name:identifier() "(" tuple_members:(type_identifier() ++ (_ "," _)) ")" { Variant { id: Default::default(), name: rc_string(name), kind: VariantKind::TupleStruct(tuple_members) } } / name:identifier() { Variant { id: Default::default(), name: rc_string(name), kind: VariantKind::UnitStruct } } rule record_variant_item() -> (Rc, TypeIdentifier) = name:identifier() _ ":" _ ty:type_identifier() { (rc_string(name), ty) } rule type_alias() -> Declaration = alias:identifier() _ "=" _ name:identifier() { Declaration::TypeAlias { alias: rc_string(alias), original: rc_string(name), } } rule type_anno() -> TypeIdentifier = ":" _ identifier:type_identifier() { identifier } pub rule expression() -> Expression = __ kind:expression_kind() _ type_anno:type_anno()? { Expression { id: Default::default(), type_anno, kind } } rule expression_no_struct() -> Expression = __ kind:expression_kind_no_struct() { Expression { id: Default::default(), type_anno: None, kind: kind } } rule expression_kind() -> ExpressionKind = precedence_expr(true) rule expression_kind_no_struct() -> ExpressionKind = precedence_expr(false) rule precedence_expr(struct_ok: bool) -> ExpressionKind = first:prefix_expr(struct_ok) _ next:(precedence_continuation(struct_ok))* { let next = next.into_iter().map(|(sigil, expr)| (BinOp::from_sigil(sigil), expr)).collect(); BinopSequence { first, next }.do_precedence() } rule precedence_continuation(struct_ok: bool) -> (&'input str, ExpressionKind) = op:operator() _ expr:prefix_expr(struct_ok) _ { (op, expr) } rule prefix_expr(struct_ok: bool) -> ExpressionKind = prefix:prefix()? expr:extended_expr(struct_ok) { if let Some(p) = prefix { let expr = Expression::new(Default::default(), expr); let prefix = PrefixOp::from_sigil(p); ExpressionKind::PrefixExp(prefix, Box::new(expr)) } else { expr } } rule prefix() -> &'input str = $(['+' | '-' | '!' ]) //TODO make the definition of operators more complex rule operator() -> &'input str = quiet!{$( ['+' | '-' | '*' | '/' | '%' | '<' | '>' | '=' | '!' | '$' | '&' | '|' | '?' | '^' | '`']+ )} / expected!("operator") rule extended_expr(struct_ok: bool) -> ExpressionKind = item:extended_expr_ok_struct() {? if struct_ok { Ok(item) } else { Err("no-struct-allowed") } } / item:extended_expr_no_struct() {? if !struct_ok { Ok(item) } else { Err("!no-struct-allowed") } } #[cache_left_rec] rule extended_expr_ok_struct() -> ExpressionKind = indexee:extended_expr_ok_struct() indexers:index_part() { ExpressionKind::Index { indexee: Box::new(Expression::new(Default::default(), indexee)), indexers, } } / f:extended_expr_ok_struct() arguments:call_part() { ExpressionKind::Call { f: Box::new(Expression::new(Default::default(), f)), arguments, } } / expr:extended_expr_ok_struct() "." name:identifier() { ExpressionKind::Access { name: Rc::new(name.to_string()), expr: Box::new(Expression::new(Default::default(),expr)), } } / primary(true) #[cache_left_rec] rule extended_expr_no_struct() -> ExpressionKind = indexee:extended_expr_no_struct() indexers:index_part() { ExpressionKind::Index { indexee: Box::new(Expression::new(Default::default(), indexee)), indexers, } } / f:extended_expr_no_struct() arguments:call_part() { ExpressionKind::Call { f: Box::new(Expression::new(Default::default(), f)), arguments, } } / expr:extended_expr_no_struct() "." name:identifier() { ExpressionKind::Access { name: Rc::new(name.to_string()), expr: Box::new(Expression::new(Default::default(),expr)), } } / primary(false) rule index_part() -> Vec = "[" indexers:(expression() ++ ",") "]" { indexers } rule call_part() -> Vec = "(" arguments:(invocation_argument() ** ",") ")" { arguments } //TODO this shouldn't be an expression b/c type annotations disallowed here rule invocation_argument() -> InvocationArgument = _ "_" _ { InvocationArgument::Ignored } / _ ident:identifier() _ "=" _ expr:expression() { InvocationArgument::Keyword { name: Rc::new(ident.to_string()), expr } } / _ expr:expression() _ { InvocationArgument::Positional(expr) } rule primary(struct_ok: bool) -> ExpressionKind = while_expr() / for_expr() / float_literal() / nat_literal() / bool_literal() / string_literal() / paren_expr() / list_expr() / if_expr() / lambda_expr() / item:named_struct() {? if struct_ok { Ok(item) } else { Err("no-struct-allowed") } } / identifier_expr() rule lambda_expr() -> ExpressionKind = r#"\"# __ "(" _ params:formal_params() _ ")" _ type_anno:(type_anno()?) _ body:block() { ExpressionKind::Lambda { params, type_anno, body } } / r#"\"# param:formal_param() _ type_anno:(type_anno()?) _ body:block() { ExpressionKind::Lambda { params: vec![param], type_anno, body } } rule for_expr() -> ExpressionKind = "for" _ enumerators:for_enumerators() _ body:for_body() { ExpressionKind::ForExpression { enumerators, body } } rule for_enumerators() -> Vec = "{" _ enumerators:(enumerator() ++ ",") _ "}" { enumerators } / enumerator:enumerator() { vec![enumerator] } //TODO add guards, etc. rule enumerator() -> Enumerator = ident:identifier() _ "<-" _ generator:expression_no_struct() { Enumerator { id: Rc::new(ident.to_string()), generator } } / //TODO need to distinguish these two cases in AST ident:identifier() _ "=" _ generator:expression_no_struct() { Enumerator { id: Rc::new(ident.to_string()), generator } } rule for_body() -> Box = "return" _ expr:expression() { Box::new(ForBody::MonadicReturn(expr)) } / body:block() { Box::new(ForBody::StatementBlock(body)) } rule while_expr() -> ExpressionKind = "while" _ cond:expression_kind_no_struct()? _ body:block() { ExpressionKind::WhileExpression { condition: cond.map(|kind| Box::new(Expression::new(Default::default(), kind))), body, } } rule identifier_expr() -> ExpressionKind = qn:qualified_identifier() { ExpressionKind::Value(qn) } rule named_struct() -> ExpressionKind = name:qualified_identifier() _ fields:record_block() { ExpressionKind::NamedStruct { name, fields: fields.into_iter().map(|(n, exp)| (Rc::new(n.to_string()), exp)).collect(), } } //TODO anonymous structs, update syntax for structs rule record_block() -> Vec<(&'input str, Expression)> = "{" _ entries:(record_entry() ** ",") _ "}" { entries } rule record_entry() -> (&'input str, Expression) = _ name:identifier() _ ":" _ expr:expression() _ { (name, expr) } rule qualified_identifier() -> QualifiedName = names:(identifier() ++ "::") { QualifiedName { id: Default::default(), components: names.into_iter().map(|name| Rc::new(name.to_string())).collect() } } //TODO improve the definition of identifiers rule identifier() -> &'input str = $(['a'..='z' | 'A'..='Z' | '_'] ['a'..='z' | 'A'..='Z' | '0'..='9' | '_']*) rule if_expr() -> ExpressionKind = "if" _ discriminator:(expression()?) _ body:if_expr_body() { ExpressionKind::IfExpression { discriminator: discriminator.map(Box::new), body: Box::new(body), } } rule if_expr_body() -> IfExpressionBody = cond_block() / simple_pattern_match() / simple_conditional() rule simple_conditional() -> IfExpressionBody = "then" _ then_case:expr_or_block() _ else_case:else_case() { IfExpressionBody::SimpleConditional { then_case, else_case } } rule simple_pattern_match() -> IfExpressionBody = "is" _ pattern:pattern() _ "then" _ then_case:expr_or_block() _ else_case:else_case() { IfExpressionBody::SimplePatternMatch { pattern, then_case, else_case } } rule cond_block() -> IfExpressionBody = "{" __ cond_arms:(cond_arm() ++ (delimiter()+)) __ "}" { IfExpressionBody::CondList(cond_arms) } rule cond_arm() -> ConditionArm = _ "else" _ body:expr_or_block() { ConditionArm { condition: Condition::Else, guard: None, body } } / _ condition:condition() _ guard:condition_guard() _ "then" _ body:expr_or_block() { ConditionArm { condition, guard, body } } rule condition() -> Condition = "is" _ pat:pattern() { Condition::Pattern(pat) } / op:operator() _ expr:expression() { Condition::TruncatedOp(BinOp::from_sigil(op), expr) } rule condition_guard() -> Option = ("if" _ expr:expression() { expr } )? rule expr_or_block() -> Block = block() / ex:expression() { Statement { id: Default::default(), location: Default::default(), kind: StatementKind::Expression(ex) }.into() } rule else_case() -> Option = ("else" _ eorb:expr_or_block() { eorb })? rule pattern() -> Pattern = "(" _ variants:(pattern() ++ ",") _ ")" { Pattern::TuplePattern(variants) } / _ pat:simple_pattern() { pat } rule simple_pattern() -> Pattern = pattern_literal() / qn:qualified_identifier() "(" members:(pattern() ** ",") ")" { Pattern::TupleStruct(qn, members) } / qn:qualified_identifier() _ "{" _ items:(record_pattern_entry() ** ",") "}" _ { let items = items.into_iter().map(|(name, pat)| (Rc::new(name.to_string()), pat)).collect(); Pattern::Record(qn, items) } / qn:qualified_identifier() { Pattern::VarOrName(qn) } rule record_pattern_entry() -> (&'input str, Pattern) = _ name:identifier() _ ":" _ pat:pattern() _ { (name, pat) } / _ name:identifier() _ { let qn = QualifiedName { id: Default::default(), components: vec![Rc::new(name.to_string())], }; (name, Pattern::VarOrName(qn)) } rule pattern_literal() -> Pattern = "true" { Pattern::Literal(PatternLiteral::BoolPattern(true)) } / "false" { Pattern::Literal(PatternLiteral::BoolPattern(false)) } / s:bare_string_literal() { Pattern::Literal(PatternLiteral::StringPattern(Rc::new(s.to_string()))) } / sign:("-"?) num:(float_literal() / nat_literal()) { let neg = sign.is_some(); Pattern::Literal(PatternLiteral::NumPattern { neg, num }) } / "_" { Pattern::Ignored } rule list_expr() -> ExpressionKind = "[" exprs:(expression() ** ",") "]" { let mut exprs = exprs; ExpressionKind::ListLiteral(exprs) } rule paren_expr() -> ExpressionKind = "(" exprs:(expression() ** ",") ")" { let mut exprs = exprs; match exprs.len() { 1 => exprs.pop().unwrap().kind, _ => ExpressionKind::TupleLiteral(exprs), } } rule string_literal() -> ExpressionKind = s:bare_string_literal(){ ExpressionKind::StringLiteral(Rc::new(s.to_string())) } //TODO string escapes, prefixes rule bare_string_literal() -> &'input str = "\"" items:$([^ '"' ]*) "\"" { items } rule bool_literal() -> ExpressionKind = "true" { ExpressionKind::BoolLiteral(true) } / "false" { ExpressionKind::BoolLiteral(false) } rule nat_literal() -> ExpressionKind = bin_literal() / hex_literal() / unmarked_literal() rule unmarked_literal() -> ExpressionKind = digits:digits() { ExpressionKind::NatLiteral(digits.parse().unwrap()) } rule bin_literal() -> ExpressionKind = "0b" digits:bin_digits() { ExpressionKind::NatLiteral(parse_binary(digits)) } rule hex_literal() -> ExpressionKind = "0x" digits:hex_digits() { ExpressionKind::NatLiteral(parse_hex(digits)) } rule float_literal() -> ExpressionKind = ds:$( digits() "." digits()? / "." digits() ) { ExpressionKind::FloatLiteral(ds.parse().unwrap()) } rule digits() -> &'input str = $((digit_group() "_"*)+) rule bin_digits() -> &'input str = $((bin_digit_group() "_"*)+) rule hex_digits() -> &'input str = $((hex_digit_group() "_"*)+) rule digit_group() -> &'input str = $(['0'..='9']+) rule bin_digit_group() -> &'input str = $(['0' | '1']+) rule hex_digit_group() -> &'input str = $(['0'..='9' | 'a'..='f' | 'A'..='F']+) } } fn parse_binary(digits: &str /*, tok: Token*/) -> u64 { let mut result: u64 = 0; let mut multiplier = 1; for d in digits.chars().rev() { match d { '1' => result += multiplier, '0' => (), '_' => continue, _ => unreachable!(), } multiplier = match multiplier.checked_mul(2) { Some(m) => m, None => /*return ParseError::new_with_token("This binary expression will overflow", tok),*/ panic!(), } } //Ok(result) result } //TODO fix these two functions fn parse_hex(digits: &str) -> u64 { let mut result: u64 = 0; let mut multiplier: u64 = 1; for d in digits.chars().rev() { if d == '_' { continue; } match d.to_digit(16) { Some(n) => result += n as u64 * multiplier, None => panic!(), } multiplier = match multiplier.checked_mul(16) { Some(m) => m, None => panic!(), } } result } #[derive(Debug)] struct BinopSequence { first: ExpressionKind, next: Vec<(BinOp, ExpressionKind)>, } impl BinopSequence { fn do_precedence(self) -> ExpressionKind { fn helper( precedence: i32, lhs: ExpressionKind, rest: &mut Vec<(BinOp, ExpressionKind)>, ) -> Expression { let mut lhs = Expression::new(Default::default(), lhs); loop { let (next_op, next_rhs) = match rest.pop() { Some((a, b)) => (a, b), None => break, }; let new_precedence = next_op.get_precedence(); if precedence >= new_precedence { rest.push((next_op, next_rhs)); break; } let rhs = helper(new_precedence, next_rhs, rest); lhs = Expression::new( Default::default(), ExpressionKind::BinExp(next_op, Box::new(lhs), Box::new(rhs)), ); } lhs } let mut as_stack = self.next.into_iter().rev().collect(); helper(BinOp::min_precedence(), self.first, &mut as_stack).kind } }