schala/schala-lang/language/src/parsing/new.rs

495 lines
20 KiB
Rust

use std::rc::Rc;
use crate::ast::*;
fn rc_string(s: &str) -> Rc<String> {
Rc::new(s.to_string())
}
peg::parser! {
pub grammar schala_parser() for str {
rule whitespace() = [' ' | '\t' | '\n']*
rule _ = quiet!{ whitespace() }
rule __ = quiet!{ [' ' | '\t' ]* }
pub rule program() -> AST =
n:(statement() ** delimiter() ) { AST { id: Default::default(), statements: n.into() } }
rule delimiter() = (";" / "\n")+
//Note - this is a hack, ideally the rule `rule block() -> Block = "{" _ items:(statement() **
//delimiter()) _ "}" { items.into() }` would've worked, but it doesn't.
pub rule block() -> Block = "{" _ items:block_item()* _ "}" { items.into() } /
"{" _ stmt:statement() _ "}" { vec![stmt].into() }
rule block_item() -> Statement =
stmt:statement() delimiter()+ { stmt }
rule statement() -> Statement =
kind:statement_kind() { Statement { id: Default::default(), location: Default::default(), kind } }
rule statement_kind() -> StatementKind =
_ decl:declaration() { StatementKind::Declaration(decl) } /
_ expr:expression() { StatementKind::Expression(expr) }
rule declaration() -> Declaration =
binding() / type_decl() / annotation() / func()
rule func() -> Declaration =
sig:func_signature() __ body:block() { Declaration::FuncDecl(sig, body) } /
sig:func_signature() { Declaration::FuncSig(sig) }
//TODO handle operators
rule func_signature() -> Signature =
"fn" _ name:identifier() "(" _ params:formal_params() _ ")" _ type_anno:type_anno()? { Signature {
name: rc_string(name), operator: false, params, type_anno
} }
rule formal_params() -> Vec<FormalParam> = params:(formal_param() ** (_ "," _)) {? if params.len() < 256 { Ok(params) } else {
Err("function-too-long") }
}
rule formal_param() -> FormalParam =
name:identifier() _ anno:type_anno()? _ "=" expr:expression() { FormalParam { name: rc_string(name),
default: Some(expr), anno } } /
name:identifier() _ anno:type_anno()? { FormalParam { name: rc_string(name), default: None, anno } }
rule annotation() -> Declaration =
"@" name:identifier() args:annotation_args()? delimiter() _ inner:statement() { Declaration::Annotation {
name: rc_string(name), arguments: if let Some(args) = args { args } else { vec![] }, inner: Box::new(inner) }
}
rule annotation_args() -> Vec<Expression> =
"(" _ args:(expression() ** (_ "," _)) _ ")" { args }
rule binding() -> Declaration =
"let" _ mutable:"mut"? _ ident:identifier() _ type_anno:type_anno()? _ "=" _ expr:expression() {
Declaration::Binding { name: Rc::new(ident.to_string()), constant: mutable.is_none(),
type_anno, expr }
}
rule type_decl() -> Declaration =
"type" _ "alias" _ alias:type_alias() { alias } /
"type" _ mutable:"mut"? _ name:type_singleton_name() _ "=" _ body:type_body() {
Declaration::TypeDecl { name, body, mutable: mutable.is_some() }
}
rule type_singleton_name() -> TypeSingletonName =
name:identifier() params:type_params()? { TypeSingletonName { name: rc_string(name), params: if let Some(params) = params { params } else { vec![] } } }
rule type_params() -> Vec<TypeIdentifier> =
"<" _ idents:(type_identifier() ** (_ "," _)) _ ">" { idents }
rule type_identifier() -> TypeIdentifier =
"(" _ items:(type_identifier() ** (_ "," _)) _ ")" { TypeIdentifier::Tuple(items) } /
singleton:type_singleton_name() { TypeIdentifier::Singleton(singleton) }
rule type_body() -> TypeBody =
"{" _ items:(record_variant_item() ++ (_ "," _)) _ "}" { TypeBody::ImmediateRecord(Default::default(), items) } /
variants:(variant_spec() ** (_ "|" _)) { TypeBody::Variants(variants) }
rule variant_spec() -> Variant =
name:identifier() _ "{" _ typed_identifier_list:(record_variant_item() ++ (_ "," _)) _ "}" { Variant {
id: Default::default(), name: rc_string(name), kind: VariantKind::Record(typed_identifier_list)
} } /
name:identifier() "(" tuple_members:(type_identifier() ++ (_ "," _)) ")" { Variant {
id: Default::default(), name: rc_string(name), kind: VariantKind::TupleStruct(tuple_members) } } /
name:identifier() { Variant { id: Default::default(), name: rc_string(name), kind: VariantKind::UnitStruct } }
rule record_variant_item() -> (Rc<String>, TypeIdentifier) =
name:identifier() _ ":" _ ty:type_identifier() { (rc_string(name), ty) }
rule type_alias() -> Declaration =
alias:identifier() _ "=" _ name:identifier() { Declaration::TypeAlias { alias: rc_string(alias), original: rc_string(name), } }
rule type_anno() -> TypeIdentifier =
":" _ ident:identifier() { TypeIdentifier::Singleton(TypeSingletonName { name: Rc::new(ident.to_string()), params: vec![] }) }
pub rule expression() -> Expression =
_ kind:expression_kind() { Expression { id: Default::default(), type_anno: None, kind: kind } }
rule expression_no_struct() -> Expression =
_ kind:expression_kind_no_struct() { Expression { id: Default::default(), type_anno: None, kind: kind } }
rule expression_kind() -> ExpressionKind =
precedence_expr(true)
rule expression_kind_no_struct() -> ExpressionKind =
precedence_expr(false)
rule precedence_expr(struct_ok: bool) -> ExpressionKind =
first:prefix_expr(struct_ok) _ next:(precedence_continuation(struct_ok))* {
let next = next.into_iter().map(|(sigil, expr)| (BinOp::from_sigil(sigil), expr)).collect();
BinopSequence { first, next }.do_precedence()
}
rule precedence_continuation(struct_ok: bool) -> (&'input str, ExpressionKind) =
op:operator() _ expr:prefix_expr(struct_ok) _ { (op, expr) }
rule prefix_expr(struct_ok: bool) -> ExpressionKind =
prefix:prefix()? expr:extended_expr(struct_ok) {
if let Some(p) = prefix {
let expr = Expression::new(Default::default(), expr);
let prefix = PrefixOp::from_sigil(p);
ExpressionKind::PrefixExp(prefix, Box::new(expr))
} else {
expr
}
}
rule prefix() -> &'input str =
$(['+' | '-' | '!' ])
//TODO make the definition of operators more complex
rule operator() -> &'input str =
quiet!{$( ['+' | '-' | '*' | '/' | '%' | '<' | '>' | '=' | '!' | '$' | '&' | '|' | '?' | '^' | '`']+ )} /
expected!("operator")
rule extended_expr(struct_ok: bool) -> ExpressionKind =
item:extended_expr_ok_struct() {? if struct_ok { Ok(item) } else { Err("no-struct-allowed") } } /
item:extended_expr_no_struct() {? if !struct_ok { Ok(item) } else { Err("!no-struct-allowed") } }
#[cache_left_rec]
rule extended_expr_ok_struct() -> ExpressionKind =
indexee:extended_expr_ok_struct() indexers:index_part() {
ExpressionKind::Index {
indexee: Box::new(Expression::new(Default::default(), indexee)),
indexers,
}
} /
f:extended_expr_ok_struct() arguments:call_part() {
ExpressionKind::Call {
f: Box::new(Expression::new(Default::default(), f)),
arguments,
}
} /
expr:extended_expr_ok_struct() "." name:identifier() { ExpressionKind::Access {
name: Rc::new(name.to_string()),
expr: Box::new(Expression::new(Default::default(),expr)),
} } /
primary(true)
#[cache_left_rec]
rule extended_expr_no_struct() -> ExpressionKind =
indexee:extended_expr_no_struct() indexers:index_part() {
ExpressionKind::Index {
indexee: Box::new(Expression::new(Default::default(), indexee)),
indexers,
}
} /
f:extended_expr_no_struct() arguments:call_part() {
ExpressionKind::Call {
f: Box::new(Expression::new(Default::default(), f)),
arguments,
}
} /
expr:extended_expr_no_struct() "." name:identifier() { ExpressionKind::Access {
name: Rc::new(name.to_string()),
expr: Box::new(Expression::new(Default::default(),expr)),
} } /
primary(false)
rule index_part() -> Vec<Expression> =
"[" indexers:(expression() ++ ",") "]" { indexers }
rule call_part() -> Vec<InvocationArgument> =
"(" arguments:(invocation_argument() ** ",") ")" { arguments }
//TODO this shouldn't be an expression b/c type annotations disallowed here
rule invocation_argument() -> InvocationArgument =
_ "_" _ { InvocationArgument::Ignored } /
_ ident:identifier() _ "=" _ expr:expression() { InvocationArgument::Keyword {
name: Rc::new(ident.to_string()),
expr
} } /
_ expr:expression() _ { InvocationArgument::Positional(expr) }
rule primary(struct_ok: bool) -> ExpressionKind =
while_expr() / for_expr() / float_literal() / nat_literal() / bool_literal() / string_literal() / paren_expr() /
list_expr() / if_expr() /
item:named_struct() {? if struct_ok { Ok(item) } else { Err("no-struct-allowed") } } /
identifier_expr()
rule for_expr() -> ExpressionKind =
"for" _ enumerators:for_enumerators() _ body:for_body() {
ExpressionKind::ForExpression { enumerators, body }
}
rule for_enumerators() -> Vec<Enumerator> =
"{" _ enumerators:(enumerator() ++ ",") _ "}" { enumerators } /
enumerator:enumerator() { vec![enumerator] }
//TODO add guards, etc.
rule enumerator() -> Enumerator =
ident:identifier() _ "<-" _ generator:expression_no_struct() {
Enumerator { id: Rc::new(ident.to_string()), generator }
} /
//TODO need to distinguish these two cases in AST
ident:identifier() _ "=" _ generator:expression_no_struct() {
Enumerator { id: Rc::new(ident.to_string()), generator }
}
rule for_body() -> Box<ForBody> =
"return" _ expr:expression() { Box::new(ForBody::MonadicReturn(expr)) } /
body:block() { Box::new(ForBody::StatementBlock(body)) }
rule while_expr() -> ExpressionKind =
"while" _ cond:expression_kind_no_struct()? _ body:block() {
ExpressionKind::WhileExpression {
condition: cond.map(|kind| Box::new(Expression::new(Default::default(), kind))),
body,
}
}
rule identifier_expr() -> ExpressionKind =
qn:qualified_identifier() { ExpressionKind::Value(qn) }
rule named_struct() -> ExpressionKind =
name:qualified_identifier() _ fields:record_block() {
ExpressionKind::NamedStruct {
name,
fields: fields.into_iter().map(|(n, exp)| (Rc::new(n.to_string()), exp)).collect(),
}
}
//TODO anonymous structs, update syntax for structs
rule record_block() -> Vec<(&'input str, Expression)> =
"{" _ entries:(record_entry() ** ",") _ "}" { entries }
rule record_entry() -> (&'input str, Expression) =
_ name:identifier() _ ":" _ expr:expression() _ { (name, expr) }
rule qualified_identifier() -> QualifiedName =
names:(identifier() ++ "::") { QualifiedName { id: Default::default(), components: names.into_iter().map(|name| Rc::new(name.to_string())).collect() } }
//TODO improve the definition of identifiers
rule identifier() -> &'input str =
$(['a'..='z' | 'A'..='Z' | '_'] ['a'..='z' | 'A'..='Z' | '0'..='9' | '_']*)
rule if_expr() -> ExpressionKind =
"if" _ discriminator:(expression()?) _ body:if_expr_body() {
ExpressionKind::IfExpression {
discriminator: discriminator.map(Box::new),
body: Box::new(body),
}
}
rule if_expr_body() -> IfExpressionBody =
cond_block() / simple_pattern_match() / simple_conditional()
rule simple_conditional() -> IfExpressionBody =
"then" _ then_case:expr_or_block() _ else_case:else_case() {
IfExpressionBody::SimpleConditional { then_case, else_case }
}
rule simple_pattern_match() -> IfExpressionBody =
"is" _ pattern:pattern() _ "then" _ then_case:expr_or_block() _ else_case:else_case() {
IfExpressionBody::SimplePatternMatch { pattern, then_case, else_case }
}
rule cond_block() -> IfExpressionBody =
"{" _ cond_arms:(cond_arm() ++ ",") _ "}" { IfExpressionBody::CondList(cond_arms) }
rule cond_arm() -> ConditionArm =
_ "else" _ body:expr_or_block() { ConditionArm { condition: Condition::Else, guard: None, body } } /
_ condition:condition() _ guard:condition_guard() _ "then" _ body:expr_or_block()
{ ConditionArm { condition, guard, body } }
rule condition() -> Condition =
"is" _ pat:pattern() { Condition::Pattern(pat) } /
op:operator() _ expr:expression() { Condition::TruncatedOp(BinOp::from_sigil(op), expr) }
rule condition_guard() -> Option<Expression> =
("if" _ expr:expression() { expr } )?
rule expr_or_block() -> Block = block() / ex:expression() {
Statement {
id: Default::default(), location: Default::default(),
kind: StatementKind::Expression(ex)
}.into()
}
rule else_case() -> Option<Block> =
("else" _ eorb:expr_or_block() { eorb })?
rule pattern() -> Pattern =
"(" _ variants:(pattern() ++ ",") _ ")" { Pattern::TuplePattern(variants) } /
_ pat:simple_pattern() { pat }
rule simple_pattern() -> Pattern =
pattern_literal() /
qn:qualified_identifier() "(" members:(pattern() ** ",") ")" {
Pattern::TupleStruct(qn, members)
} /
qn:qualified_identifier() _ "{" _ items:(record_pattern_entry() ** ",") "}" _ {
let items = items.into_iter().map(|(name, pat)| (Rc::new(name.to_string()), pat)).collect();
Pattern::Record(qn, items)
} /
qn:qualified_identifier() { Pattern::VarOrName(qn) }
rule record_pattern_entry() -> (&'input str, Pattern) =
_ name:identifier() _ ":" _ pat:pattern() _ { (name, pat) } /
_ name:identifier() _ {
let qn = QualifiedName {
id: Default::default(),
components: vec![Rc::new(name.to_string())],
};
(name, Pattern::VarOrName(qn))
}
rule pattern_literal() -> Pattern =
"true" { Pattern::Literal(PatternLiteral::BoolPattern(true)) } /
"false" { Pattern::Literal(PatternLiteral::BoolPattern(false)) } /
s:bare_string_literal() { Pattern::Literal(PatternLiteral::StringPattern(Rc::new(s.to_string()))) } /
sign:("-"?) num:nat_literal() {
let neg = sign.is_some();
Pattern::Literal(PatternLiteral::NumPattern { neg, num })
} /
"_" { Pattern::Ignored }
rule list_expr() -> ExpressionKind =
"[" exprs:(expression() ** ",") "]" {
let mut exprs = exprs;
ExpressionKind::ListLiteral(exprs)
}
rule paren_expr() -> ExpressionKind =
"(" exprs:(expression() ** ",") ")" {
let mut exprs = exprs;
match exprs.len() {
1 => exprs.pop().unwrap().kind,
_ => ExpressionKind::TupleLiteral(exprs),
}
}
rule string_literal() -> ExpressionKind =
s:bare_string_literal(){ ExpressionKind::StringLiteral(Rc::new(s.to_string())) }
//TODO string escapes, prefixes
rule bare_string_literal() -> &'input str =
"\"" items:$([^ '"' ]*) "\"" { items }
rule bool_literal() -> ExpressionKind =
"true" { ExpressionKind::BoolLiteral(true) } / "false" { ExpressionKind::BoolLiteral(false) }
rule nat_literal() -> ExpressionKind =
bin_literal() / hex_literal() / unmarked_literal()
rule unmarked_literal() -> ExpressionKind =
digits:digits() { ExpressionKind::NatLiteral(digits.parse().unwrap()) }
rule bin_literal() -> ExpressionKind =
"0b" digits:bin_digits() { ExpressionKind::NatLiteral(parse_binary(digits)) }
rule hex_literal() -> ExpressionKind =
"0x" digits:hex_digits() { ExpressionKind::NatLiteral(parse_hex(digits)) }
rule float_literal() -> ExpressionKind =
ds:$( digits() "." digits()? / "." digits() ) { ExpressionKind::FloatLiteral(ds.parse().unwrap()) }
rule digits() -> &'input str = $((digit_group() "_"*)+)
rule bin_digits() -> &'input str = $((bin_digit_group() "_"*)+)
rule hex_digits() -> &'input str = $((hex_digit_group() "_"*)+)
rule digit_group() -> &'input str = $(['0'..='9']+)
rule bin_digit_group() -> &'input str = $(['0' | '1']+)
rule hex_digit_group() -> &'input str = $(['0'..='9' | 'a'..='f' | 'A'..='F']+)
}
}
fn parse_binary(digits: &str /*, tok: Token*/) -> u64 {
let mut result: u64 = 0;
let mut multiplier = 1;
for d in digits.chars().rev() {
match d {
'1' => result += multiplier,
'0' => (),
'_' => continue,
_ => unreachable!(),
}
multiplier = match multiplier.checked_mul(2) {
Some(m) => m,
None =>
/*return ParseError::new_with_token("This binary expression will overflow", tok),*/
panic!(),
}
}
//Ok(result)
result
}
//TODO fix these two functions
fn parse_hex(digits: &str) -> u64 {
let mut result: u64 = 0;
let mut multiplier: u64 = 1;
for d in digits.chars().rev() {
if d == '_' {
continue;
}
match d.to_digit(16) {
Some(n) => result += n as u64 * multiplier,
None => panic!(),
}
multiplier = match multiplier.checked_mul(16) {
Some(m) => m,
None => panic!(),
}
}
result
}
#[derive(Debug)]
struct BinopSequence {
first: ExpressionKind,
next: Vec<(BinOp, ExpressionKind)>,
}
impl BinopSequence {
fn do_precedence(self) -> ExpressionKind {
fn helper(
precedence: i32,
lhs: ExpressionKind,
rest: &mut Vec<(BinOp, ExpressionKind)>,
) -> Expression {
let mut lhs = Expression::new(Default::default(), lhs);
loop {
let (next_op, next_rhs) = match rest.pop() {
Some((a, b)) => (a, b),
None => break,
};
let new_precedence = next_op.get_precedence();
if precedence >= new_precedence {
rest.push((next_op, next_rhs));
break;
}
let rhs = helper(new_precedence, next_rhs, rest);
lhs = Expression::new(
Default::default(),
ExpressionKind::BinExp(next_op, Box::new(lhs), Box::new(rhs)),
);
}
lhs
}
let mut as_stack = self.next.into_iter().rev().collect();
helper(BinOp::min_precedence(), self.first, &mut as_stack).kind
}
}