use std::rc::Rc; //TODO make use of the format_parse_error function //use crate::error::{SchalaError, format_parse_error}; use crate::{ ast::*, identifier::{Id, IdStore}, parsing::ParseError, }; fn rc_string(s: &str) -> Rc { Rc::new(s.to_string()) } pub struct Parser { id_store: IdStore, } impl Parser { pub(crate) fn new() -> Self { Self { id_store: IdStore::new() } } pub(crate) fn parse(&mut self, input: &str) -> Result { use peg::str::LineCol; schala_parser::program(input, self).map_err(|err: peg::error::ParseError| { let msg = err.to_string(); ParseError { msg, location: err.location.offset.into() } }) } fn fresh(&mut self) -> Id { self.id_store.fresh() } } enum ExtendedPart<'a> { Index(Vec), Accessor(&'a str), Call(Vec), } peg::parser! { pub grammar schala_parser() for str { rule whitespace() = [' ' | '\t' ] rule whitespace_or_newline() = [' ' | '\t' | '\n' ] rule _ = quiet!{ (block_comment() / line_comment() / whitespace())* } rule __ = quiet!{ (block_comment() / line_comment() / whitespace_or_newline())* } rule block_comment() = "/*" (block_comment() / !"*/" [_])* "*/" rule line_comment() = "//" (!['\n'] [_])* &"\n" pub rule program(parser: &mut Parser) -> AST = __ statements:(statement(parser) ** (delimiter()+) ) __ { AST { id: parser.fresh(), statements: statements.into() } } rule delimiter() = (";" / "\n")+ //Note - this is a hack, ideally the rule `rule block() -> Block = "{" _ items:(statement() ** //delimiter()) _ "}" { items.into() }` would've worked, but it doesn't. pub rule block(parser: &mut Parser) -> Block = "{" __ items:block_item(parser)* __ "}" { items.into() } / "{" __ stmt:statement(parser) __ "}" { vec![stmt].into() } rule block_item(parser: &mut Parser) -> Statement = _ stmt:statement(parser) _ delimiter()+ { stmt } rule statement(parser: &mut Parser) -> Statement = _ pos:position!() kind:statement_kind(parser) _ { Statement { id: parser.fresh(), location: pos.into(), kind } } rule statement_kind(parser: &mut Parser) -> StatementKind = __ import:import(parser) { StatementKind::Import(import) } / __ decl:declaration(parser) { StatementKind::Declaration(decl) } / __ flow:flow(parser) { StatementKind::Flow(flow) } / __ expr:expression(parser) { StatementKind::Expression(expr) } rule flow(parser: &mut Parser) -> FlowControl = "continue" { FlowControl::Continue } / "break" { FlowControl::Break } / "return" _ expr:expression(parser)? { FlowControl::Return(expr) } rule import(parser: &mut Parser) -> ImportSpecifier = "import" _ path_components:path_components() suffix:import_suffix()? { ImportSpecifier { id: parser.fresh(), path_components, imported_names: suffix.unwrap_or(ImportedNames::LastOfPath) } } rule path_components() -> Vec> = "::"? name:identifier() rest:path_component()* { let mut items = vec![rc_string(name)]; items.extend(rest.into_iter().map(rc_string)); items } rule path_component() -> &'input str = "::" ident:identifier() { ident } rule import_suffix() -> ImportedNames = "::*" { ImportedNames::All } / "::{" __ names:(identifier() ** (_ "," _)) __ "}" { ImportedNames::List(names.into_iter().map(rc_string).collect()) } rule declaration(parser: &mut Parser) -> Declaration = binding(parser) / type_decl(parser) / annotation(parser) / func(parser) / interface(parser) / implementation(parser) / module(parser) rule module(parser: &mut Parser) -> Declaration = "module" _ name:identifier() _ items:block(parser) { Declaration::Module { name: rc_string(name), items } } rule implementation(parser: &mut Parser) -> Declaration = "impl" _ interface:type_singleton_name() _ "for" _ type_name:type_identifier() _ block:decl_block(parser) { Declaration::Impl { type_name, interface_name: Some(interface), block } } / "impl" _ type_name:type_identifier() _ block:decl_block(parser) { Declaration::Impl { type_name, interface_name: None, block } } rule decl_block(parser: &mut Parser) -> Vec = "{" __ decls:(func_declaration(parser) ** (delimiter()+)) __ "}" { decls } rule interface(parser: &mut Parser) -> Declaration = "interface" _ name:identifier() _ signatures:signature_block(parser) { Declaration::Interface { name: rc_string(name), signatures } } rule signature_block(parser: &mut Parser) -> Vec = "{" __ signatures:(func_signature(parser) ** (delimiter()+)) __ "}" { signatures } rule func(parser: &mut Parser) -> Declaration = decl:func_declaration(parser) { decl } / sig:func_signature(parser) { Declaration::FuncSig(sig) } rule func_declaration(parser: &mut Parser) -> Declaration = _ sig:func_signature(parser) __ body:block(parser) { Declaration::FuncDecl(sig, body) } //TODO handle operators rule func_signature(parser: &mut Parser) -> Signature = _ "fn" _ name:identifier() "(" _ params:formal_params(parser) _ ")" _ type_anno:type_anno()? { Signature { name: rc_string(name), operator: false, params, type_anno } } rule formal_params(parser: &mut Parser) -> Vec = params:(formal_param(parser) ** (_ "," _)) {? if params.len() < 256 { Ok(params) } else { Err("function-too-long") } } rule formal_param(parser: &mut Parser) -> FormalParam = name:identifier() _ anno:type_anno()? _ "=" expr:expression(parser) { FormalParam { name: rc_string(name), default: Some(expr), anno } } / name:identifier() _ anno:type_anno()? { FormalParam { name: rc_string(name), default: None, anno } } rule annotation(parser: &mut Parser) -> Declaration = "@" name:identifier() args:annotation_args(parser)? delimiter()+ _ inner:statement(parser) { Declaration::Annotation { name: rc_string(name), arguments: if let Some(args) = args { args } else { vec![] }, inner: Box::new(inner) } } rule annotation_args(parser: &mut Parser) -> Vec = "(" _ args:(expression(parser) ** (_ "," _)) _ ")" { args } rule binding(parser: &mut Parser) -> Declaration = "let" _ mutable:"mut"? _ ident:identifier() _ type_anno:type_anno()? _ "=" _ expr:expression(parser) { Declaration::Binding { name: Rc::new(ident.to_string()), constant: mutable.is_none(), type_anno, expr } } rule type_decl(parser: &mut Parser) -> Declaration = "type" _ "alias" _ alias:type_alias() { alias } / "type" _ mutable:"mut"? _ name:type_singleton_name() _ "=" _ body:type_body(parser) { Declaration::TypeDecl { name, body, mutable: mutable.is_some() } } rule type_singleton_name() -> TypeSingletonName = name:identifier() params:type_params()? { TypeSingletonName { name: rc_string(name), params: if let Some(params) = params { params } else { vec![] } } } rule type_params() -> Vec = "<" _ idents:(type_identifier() ** (_ "," _)) _ ">" { idents } rule type_identifier() -> TypeIdentifier = "(" _ items:(type_identifier() ** (_ "," _)) _ ")" { TypeIdentifier::Tuple(items) } / singleton:type_singleton_name() { TypeIdentifier::Singleton(singleton) } rule type_body(parser: &mut Parser) -> TypeBody = "{" _ items:(record_variant_item() ** (__ "," __)) __ "}" { TypeBody::ImmediateRecord(parser.fresh(), items) } / variants:(variant_spec(parser) ** (__ "|" __)) { TypeBody::Variants(variants) } rule variant_spec(parser: &mut Parser) -> Variant = name:identifier() __ "{" __ typed_identifier_list:(record_variant_item() ** (__ "," __)) __ ","? __ "}" { Variant { id: parser.fresh(), name: rc_string(name), kind: VariantKind::Record(typed_identifier_list) } } / name:identifier() "(" tuple_members:(type_identifier() ++ (__ "," __)) ")" { Variant { id: parser.fresh(), name: rc_string(name), kind: VariantKind::TupleStruct(tuple_members) } } / name:identifier() { Variant { id: parser.fresh(), name: rc_string(name), kind: VariantKind::UnitStruct } } rule record_variant_item() -> (Rc, TypeIdentifier) = name:identifier() _ ":" _ ty:type_identifier() { (rc_string(name), ty) } rule type_alias() -> Declaration = alias:identifier() _ "=" _ name:identifier() { Declaration::TypeAlias { alias: rc_string(alias), original: rc_string(name), } } rule type_anno() -> TypeIdentifier = ":" _ identifier:type_identifier() { identifier } pub rule expression(parser: &mut Parser) -> Expression = __ kind:expression_kind(true, parser) _ type_anno:type_anno()? { Expression { id: parser.fresh(), type_anno, kind } } rule expression_no_struct(parser: &mut Parser) -> Expression = __ kind:expression_kind(false, parser) { Expression { id: parser.fresh(), type_anno: None, kind } } rule expression_kind(struct_ok: bool, parser: &mut Parser) -> ExpressionKind = precedence_expr(struct_ok, parser) rule precedence_expr(struct_ok: bool, parser: &mut Parser) -> ExpressionKind = first:prefix_expr(struct_ok, parser) _ next:(precedence_continuation(struct_ok, parser))* { let next = next.into_iter().map(|(sigil, expr)| (BinOp::from_sigil(sigil), expr)).collect(); BinopSequence { first, next }.do_precedence(parser) } rule precedence_continuation(struct_ok: bool, parser: &mut Parser) -> (&'input str, ExpressionKind) = op:operator() _ expr:prefix_expr(struct_ok, parser) _ { (op, expr) } rule prefix_expr(struct_ok: bool, parser: &mut Parser) -> ExpressionKind = prefix:prefix()? expr:extended_expr(struct_ok, parser) { if let Some(p) = prefix { let expr = Expression::new(parser.fresh(), expr); let prefix = PrefixOp::from_sigil(p); ExpressionKind::PrefixExp(prefix, Box::new(expr)) } else { expr } } rule prefix() -> &'input str = $(['+' | '-' | '!' ]) //TODO make the definition of operators more complex rule operator() -> &'input str = quiet!{!"*/" s:$( ['+' | '-' | '*' | '/' | '%' | '<' | '>' | '=' | '!' | '$' | '&' | '|' | '?' | '^' | '`']+ ) { s } } / expected!("operator") rule extended_expr(struct_ok: bool, parser: &mut Parser) -> ExpressionKind = primary:primary(struct_ok, parser) parts:(extended_expr_part(parser)*) { let mut expression = Expression::new(parser.fresh(), primary); for part in parts.into_iter() { let kind = match part { ExtendedPart::Index(indexers) => { ExpressionKind::Index { indexee: Box::new(expression), indexers } }, ExtendedPart::Accessor(name) => { let name = rc_string(name); ExpressionKind::Access { name, expr: Box::new(expression) } }, ExtendedPart::Call(arguments) => { ExpressionKind::Call { f: Box::new(expression), arguments } } }; expression = Expression::new(parser.fresh(), kind); } expression.kind } rule extended_expr_part(parser: &mut Parser) -> ExtendedPart<'input> = indexers:index_part(parser) { ExtendedPart::Index(indexers) } / arguments:call_part(parser) { ExtendedPart::Call(arguments) } / "." name:identifier() { ExtendedPart::Accessor(name) } rule index_part(parser: &mut Parser) -> Vec = "[" indexers:(expression(parser) ++ ",") "]" { indexers } rule call_part(parser: &mut Parser) -> Vec = "(" arguments:(invocation_argument(parser) ** ",") ")" { arguments } //TODO this shouldn't be an expression b/c type annotations disallowed here rule invocation_argument(parser: &mut Parser) -> InvocationArgument = _ "_" _ { InvocationArgument::Ignored } / _ ident:identifier() _ "=" _ expr:expression(parser) { InvocationArgument::Keyword { name: Rc::new(ident.to_string()), expr } } / _ expr:expression(parser) _ { InvocationArgument::Positional(expr) } rule primary(struct_ok: bool, parser: &mut Parser) -> ExpressionKind = while_expr(parser) / for_expr(parser) / float_literal() / nat_literal() / bool_literal() / string_literal() / paren_expr(parser) / list_expr(parser) / if_expr(parser) / lambda_expr(parser) / item:named_struct(parser) {? if struct_ok { Ok(item) } else { Err("no-struct-allowed") } } / identifier_expr(parser) rule lambda_expr(parser: &mut Parser) -> ExpressionKind = r#"\"# __ "(" _ params:formal_params(parser) _ ")" _ type_anno:(type_anno()?) _ body:block(parser) { ExpressionKind::Lambda { params, type_anno, body } } / r#"\"# param:formal_param(parser) _ type_anno:(type_anno()?) _ body:block(parser) { ExpressionKind::Lambda { params: vec![param], type_anno, body } } rule for_expr(parser: &mut Parser) -> ExpressionKind = "for" _ enumerators:for_enumerators(parser) _ body:for_body(parser) { ExpressionKind::ForExpression { enumerators, body } } rule for_enumerators(parser: &mut Parser) -> Vec = "{" _ enumerators:(enumerator(parser) ++ ",") _ "}" { enumerators } / enumerator:enumerator(parser) { vec![enumerator] } //TODO add guards, etc. rule enumerator(parser: &mut Parser) -> Enumerator = ident:identifier() _ "<-" _ generator:expression_no_struct(parser) { Enumerator { id: Rc::new(ident.to_string()), generator } } / //TODO need to distinguish these two cases in AST ident:identifier() _ "=" _ generator:expression_no_struct(parser) { Enumerator { id: Rc::new(ident.to_string()), generator } } rule for_body(parser: &mut Parser) -> Box = "return" _ expr:expression(parser) { Box::new(ForBody::MonadicReturn(expr)) } / body:block(parser) { Box::new(ForBody::StatementBlock(body)) } rule while_expr(parser: &mut Parser) -> ExpressionKind = "while" _ cond:expression_kind(false, parser)? _ body:block(parser) { ExpressionKind::WhileExpression { condition: cond.map(|kind| Box::new(Expression::new(parser.fresh(), kind))), body, } } rule identifier_expr(parser: &mut Parser) -> ExpressionKind = qn:qualified_identifier(parser) { ExpressionKind::Value(qn) } rule named_struct(parser: &mut Parser) -> ExpressionKind = name:qualified_identifier(parser) _ fields:record_block(parser) { ExpressionKind::NamedStruct { name, fields: fields.into_iter().map(|(n, exp)| (Rc::new(n.to_string()), exp)).collect(), } } //TODO anonymous structs, update syntax for structs rule record_block(parser: &mut Parser) -> Vec<(&'input str, Expression)> = "{" _ entries:(record_entry(parser) ** ",") _ "}" { entries } rule record_entry(parser: &mut Parser) -> (&'input str, Expression) = _ name:identifier() _ ":" _ expr:expression(parser) _ { (name, expr) } rule qualified_identifier(parser: &mut Parser) -> QualifiedName = names:(identifier() ++ "::") { QualifiedName { id: parser.fresh(), components: names.into_iter().map(|name| Rc::new(name.to_string())).collect() } } //TODO improve the definition of identifiers rule identifier() -> &'input str = !(reserved() !(ident_continuation())) text:$(['a'..='z' | 'A'..='Z' | '_'] ident_continuation()*) { text } rule ident_continuation() -> &'input str = text:$(['a'..='z' | 'A'..='Z' | '0'..='9' | '_']) rule reserved() = "if" / "then" / "else" / "is" / "fn" / "for" / "while" / "let" / "in" / "mut" / "return" / "break" / "alias" / "type" / "self" / "Self" / "interface" / "impl" / "true" / "false" / "module" / "import" rule if_expr(parser: &mut Parser) -> ExpressionKind = "if" _ discriminator:(expression(parser)?) _ body:if_expr_body(parser) { ExpressionKind::IfExpression { discriminator: discriminator.map(Box::new), body: Box::new(body), } } rule if_expr_body(parser: &mut Parser) -> IfExpressionBody = cond_block(parser) / simple_pattern_match(parser) / simple_conditional(parser) rule simple_conditional(parser: &mut Parser) -> IfExpressionBody = "then" _ then_case:expr_or_block(parser) _ else_case:else_case(parser) { IfExpressionBody::SimpleConditional { then_case, else_case } } rule simple_pattern_match(parser: &mut Parser) -> IfExpressionBody = "is" _ pattern:pattern(parser) _ "then" _ then_case:expr_or_block(parser) _ else_case:else_case(parser) { IfExpressionBody::SimplePatternMatch { pattern, then_case, else_case } } rule cond_block(parser: &mut Parser) -> IfExpressionBody = "{" __ cond_arms:(cond_arm(parser) ++ (delimiter()+)) __ "}" { IfExpressionBody::CondList(cond_arms) } rule cond_arm(parser: &mut Parser) -> ConditionArm = _ "else" _ body:expr_or_block(parser) { ConditionArm { condition: Condition::Else, guard: None, body } } / _ condition:condition(parser) _ guard:condition_guard(parser) _ "then" _ body:expr_or_block(parser) { ConditionArm { condition, guard, body } } rule condition(parser: &mut Parser) -> Condition = "is" _ pat:pattern(parser) { Condition::Pattern(pat) } / op:operator() _ expr:expression(parser) { Condition::TruncatedOp(BinOp::from_sigil(op), expr) } rule condition_guard(parser: &mut Parser) -> Option = ("if" _ expr:expression(parser) { expr } )? rule expr_or_block(parser: &mut Parser) -> Block = block(parser) / pos:position!() ex:expression(parser) { Statement { id: parser.fresh() , location: pos.into(), kind: StatementKind::Expression(ex) }.into() } rule else_case(parser: &mut Parser) -> Option = ("else" _ eorb:expr_or_block(parser) { eorb })? rule pattern(parser: &mut Parser) -> Pattern = "(" _ variants:(pattern(parser) ++ ",") _ ")" { Pattern::TuplePattern(variants) } / _ pat:simple_pattern(parser) { pat } rule simple_pattern(parser: &mut Parser) -> Pattern = pattern_literal() / qn:qualified_identifier(parser) "(" members:(pattern(parser) ** ",") ")" { Pattern::TupleStruct(qn, members) } / qn:qualified_identifier(parser) _ "{" _ items:(record_pattern_entry(parser) ** ",") "}" _ { let items = items.into_iter().map(|(name, pat)| (Rc::new(name.to_string()), pat)).collect(); Pattern::Record(qn, items) } / qn:qualified_identifier(parser) { Pattern::VarOrName(qn) } rule record_pattern_entry(parser: &mut Parser) -> (&'input str, Pattern) = _ name:identifier() _ ":" _ pat:pattern(parser) _ { (name, pat) } / _ name:identifier() _ { let qn = QualifiedName { id: parser.fresh(), components: vec![Rc::new(name.to_string())], }; (name, Pattern::VarOrName(qn)) } rule pattern_literal() -> Pattern = "true" { Pattern::Literal(PatternLiteral::BoolPattern(true)) } / "false" { Pattern::Literal(PatternLiteral::BoolPattern(false)) } / s:bare_string_literal() { Pattern::Literal(PatternLiteral::StringPattern(Rc::new(s.to_string()))) } / sign:("-"?) num:(float_literal() / nat_literal()) { let neg = sign.is_some(); Pattern::Literal(PatternLiteral::NumPattern { neg, num }) } / "_" { Pattern::Ignored } rule list_expr(parser: &mut Parser) -> ExpressionKind = "[" exprs:(expression(parser) ** ",") "]" { let mut exprs = exprs; ExpressionKind::ListLiteral(exprs) } rule paren_expr(parser: &mut Parser) -> ExpressionKind = "(" exprs:(expression(parser) ** ",") ")" { let mut exprs = exprs; match exprs.len() { 1 => exprs.pop().unwrap().kind, _ => ExpressionKind::TupleLiteral(exprs), } } //TODO need to do something with prefix in the AST rule string_literal() -> ExpressionKind = prefix:identifier()? s:bare_string_literal(){ ExpressionKind::StringLiteral(Rc::new(s.to_string())) } rule bare_string_literal() -> &'input str = "\"" s:$(string_component()*) "\"" { s } rule string_component() -> &'input str = r#"\\"# { "\\" } / r#"\""# { "\"" } / r#"\t"# { "\t" } / r#"\n"# { "\n" } / ch:$([^ '"' ]) { ch } rule bool_literal() -> ExpressionKind = "true" { ExpressionKind::BoolLiteral(true) } / "false" { ExpressionKind::BoolLiteral(false) } rule nat_literal() -> ExpressionKind = bin_literal() / hex_literal() / unmarked_literal() rule unmarked_literal() -> ExpressionKind = digits:digits() { ExpressionKind::NatLiteral(digits.parse().unwrap()) } rule bin_literal() -> ExpressionKind = "0b" digits:bin_digits() {? parse_binary(digits).map(ExpressionKind::NatLiteral) } rule hex_literal() -> ExpressionKind = "0x" digits:hex_digits() {? parse_hex(digits).map(ExpressionKind::NatLiteral) } rule float_literal() -> ExpressionKind = ds:$( digits() "." digits()? / "." digits() ) { ExpressionKind::FloatLiteral(ds.parse().unwrap()) } rule digits() -> &'input str = $((digit_group() "_"*)+) rule bin_digits() -> &'input str = $((bin_digit_group() "_"*)+) rule hex_digits() -> &'input str = $((hex_digit_group() "_"*)+) rule digit_group() -> &'input str = $(['0'..='9']+) rule bin_digit_group() -> &'input str = $(['0' | '1']+) rule hex_digit_group() -> &'input str = $(['0'..='9' | 'a'..='f' | 'A'..='F']+) } } fn parse_binary(digits: &str) -> Result { let mut result: u64 = 0; let mut multiplier = 1; for d in digits.chars().rev() { match d { '1' => result += multiplier, '0' => (), '_' => continue, _ => unreachable!(), } multiplier = match multiplier.checked_mul(2) { Some(m) => m, None => return Err("Binary expression will overflow"), } } Ok(result) } fn parse_hex(digits: &str) -> Result { let mut result: u64 = 0; let mut multiplier: u64 = 1; for d in digits.chars().rev() { if d == '_' { continue; } match d.to_digit(16) { Some(n) => result += n as u64 * multiplier, None => return Err("Internal parser error: invalid hex digit"), } multiplier = match multiplier.checked_mul(16) { Some(m) => m, None => return Err("Hexadecimal expression will overflow"), } } Ok(result) } #[derive(Debug)] struct BinopSequence { first: ExpressionKind, next: Vec<(BinOp, ExpressionKind)>, } impl BinopSequence { fn do_precedence(self, parser: &mut Parser) -> ExpressionKind { fn helper( precedence: i32, lhs: ExpressionKind, rest: &mut Vec<(BinOp, ExpressionKind)>, parser: &mut Parser, ) -> Expression { let mut lhs = Expression::new(parser.fresh(), lhs); while let Some((next_op, next_rhs)) = rest.pop() { let new_precedence = next_op.get_precedence(); if precedence >= new_precedence { rest.push((next_op, next_rhs)); break; } let rhs = helper(new_precedence, next_rhs, rest, parser); lhs = Expression::new( parser.fresh(), ExpressionKind::BinExp(next_op, Box::new(lhs), Box::new(rhs)), ); } lhs } let mut as_stack = self.next.into_iter().rev().collect(); helper(BinOp::min_precedence(), self.first, &mut as_stack, parser).kind } }