503 lines
21 KiB
Rust
503 lines
21 KiB
Rust
use std::rc::Rc;
|
|
|
|
use crate::ast::*;
|
|
|
|
fn rc_string(s: &str) -> Rc<String> {
|
|
Rc::new(s.to_string())
|
|
}
|
|
|
|
peg::parser! {
|
|
pub grammar schala_parser() for str {
|
|
|
|
rule whitespace() = [' ' | '\t' | '\n']*
|
|
|
|
rule _ = quiet!{ whitespace() }
|
|
|
|
rule __ = quiet!{ [' ' | '\t' ]* }
|
|
|
|
pub rule program() -> AST =
|
|
statements:(statement() ** delimiter() ) { AST { id: Default::default(), statements: statements.into() } }
|
|
|
|
rule delimiter() = (";" / "\n")+
|
|
|
|
//Note - this is a hack, ideally the rule `rule block() -> Block = "{" _ items:(statement() **
|
|
//delimiter()) _ "}" { items.into() }` would've worked, but it doesn't.
|
|
pub rule block() -> Block = "{" _ items:block_item()* _ "}" { items.into() } /
|
|
"{" _ stmt:statement() _ "}" { vec![stmt].into() }
|
|
|
|
rule block_item() -> Statement =
|
|
stmt:statement() delimiter()+ { stmt }
|
|
|
|
rule statement() -> Statement =
|
|
kind:statement_kind() { Statement { id: Default::default(), location: Default::default(), kind } }
|
|
|
|
rule statement_kind() -> StatementKind =
|
|
_ decl:declaration() { StatementKind::Declaration(decl) } /
|
|
_ expr:expression() { StatementKind::Expression(expr) }
|
|
|
|
rule declaration() -> Declaration =
|
|
binding() / type_decl() / annotation() / func()
|
|
|
|
rule func() -> Declaration =
|
|
sig:func_signature() __ body:block() { Declaration::FuncDecl(sig, body) } /
|
|
sig:func_signature() { Declaration::FuncSig(sig) }
|
|
|
|
//TODO handle operators
|
|
rule func_signature() -> Signature =
|
|
"fn" _ name:identifier() "(" _ params:formal_params() _ ")" _ type_anno:type_anno()? { Signature {
|
|
name: rc_string(name), operator: false, params, type_anno
|
|
} }
|
|
|
|
rule formal_params() -> Vec<FormalParam> = params:(formal_param() ** (_ "," _)) {? if params.len() < 256 { Ok(params) } else {
|
|
Err("function-too-long") }
|
|
}
|
|
|
|
rule formal_param() -> FormalParam =
|
|
name:identifier() _ anno:type_anno()? _ "=" expr:expression() { FormalParam { name: rc_string(name),
|
|
default: Some(expr), anno } } /
|
|
name:identifier() _ anno:type_anno()? { FormalParam { name: rc_string(name), default: None, anno } }
|
|
|
|
|
|
rule annotation() -> Declaration =
|
|
"@" name:identifier() args:annotation_args()? delimiter() _ inner:statement() { Declaration::Annotation {
|
|
name: rc_string(name), arguments: if let Some(args) = args { args } else { vec![] }, inner: Box::new(inner) }
|
|
}
|
|
|
|
rule annotation_args() -> Vec<Expression> =
|
|
"(" _ args:(expression() ** (_ "," _)) _ ")" { args }
|
|
|
|
|
|
rule binding() -> Declaration =
|
|
"let" _ mutable:"mut"? _ ident:identifier() _ type_anno:type_anno()? _ "=" _ expr:expression() {
|
|
Declaration::Binding { name: Rc::new(ident.to_string()), constant: mutable.is_none(),
|
|
type_anno, expr }
|
|
}
|
|
|
|
|
|
rule type_decl() -> Declaration =
|
|
"type" _ "alias" _ alias:type_alias() { alias } /
|
|
"type" _ mutable:"mut"? _ name:type_singleton_name() _ "=" _ body:type_body() {
|
|
Declaration::TypeDecl { name, body, mutable: mutable.is_some() }
|
|
}
|
|
|
|
rule type_singleton_name() -> TypeSingletonName =
|
|
name:identifier() params:type_params()? { TypeSingletonName { name: rc_string(name), params: if let Some(params) = params { params } else { vec![] } } }
|
|
|
|
rule type_params() -> Vec<TypeIdentifier> =
|
|
"<" _ idents:(type_identifier() ** (_ "," _)) _ ">" { idents }
|
|
|
|
rule type_identifier() -> TypeIdentifier =
|
|
"(" _ items:(type_identifier() ** (_ "," _)) _ ")" { TypeIdentifier::Tuple(items) } /
|
|
singleton:type_singleton_name() { TypeIdentifier::Singleton(singleton) }
|
|
|
|
rule type_body() -> TypeBody =
|
|
"{" _ items:(record_variant_item() ++ (_ "," _)) _ "}" { TypeBody::ImmediateRecord(Default::default(), items) } /
|
|
variants:(variant_spec() ** (_ "|" _)) { TypeBody::Variants(variants) }
|
|
|
|
rule variant_spec() -> Variant =
|
|
name:identifier() _ "{" _ typed_identifier_list:(record_variant_item() ++ (_ "," _)) _ "}" { Variant {
|
|
id: Default::default(), name: rc_string(name), kind: VariantKind::Record(typed_identifier_list)
|
|
} } /
|
|
name:identifier() "(" tuple_members:(type_identifier() ++ (_ "," _)) ")" { Variant {
|
|
id: Default::default(), name: rc_string(name), kind: VariantKind::TupleStruct(tuple_members) } } /
|
|
name:identifier() { Variant { id: Default::default(), name: rc_string(name), kind: VariantKind::UnitStruct } }
|
|
|
|
rule record_variant_item() -> (Rc<String>, TypeIdentifier) =
|
|
name:identifier() _ ":" _ ty:type_identifier() { (rc_string(name), ty) }
|
|
|
|
rule type_alias() -> Declaration =
|
|
alias:identifier() _ "=" _ name:identifier() { Declaration::TypeAlias { alias: rc_string(alias), original: rc_string(name), } }
|
|
|
|
rule type_anno() -> TypeIdentifier =
|
|
":" _ ident:identifier() { TypeIdentifier::Singleton(TypeSingletonName { name: Rc::new(ident.to_string()), params: vec![] }) }
|
|
|
|
pub rule expression() -> Expression =
|
|
_ kind:expression_kind() { Expression { id: Default::default(), type_anno: None, kind: kind } }
|
|
|
|
rule expression_no_struct() -> Expression =
|
|
_ kind:expression_kind_no_struct() { Expression { id: Default::default(), type_anno: None, kind: kind } }
|
|
|
|
rule expression_kind() -> ExpressionKind =
|
|
precedence_expr(true)
|
|
|
|
rule expression_kind_no_struct() -> ExpressionKind =
|
|
precedence_expr(false)
|
|
|
|
rule precedence_expr(struct_ok: bool) -> ExpressionKind =
|
|
first:prefix_expr(struct_ok) _ next:(precedence_continuation(struct_ok))* {
|
|
let next = next.into_iter().map(|(sigil, expr)| (BinOp::from_sigil(sigil), expr)).collect();
|
|
BinopSequence { first, next }.do_precedence()
|
|
}
|
|
|
|
rule precedence_continuation(struct_ok: bool) -> (&'input str, ExpressionKind) =
|
|
op:operator() _ expr:prefix_expr(struct_ok) _ { (op, expr) }
|
|
|
|
rule prefix_expr(struct_ok: bool) -> ExpressionKind =
|
|
prefix:prefix()? expr:extended_expr(struct_ok) {
|
|
if let Some(p) = prefix {
|
|
let expr = Expression::new(Default::default(), expr);
|
|
let prefix = PrefixOp::from_sigil(p);
|
|
ExpressionKind::PrefixExp(prefix, Box::new(expr))
|
|
} else {
|
|
expr
|
|
}
|
|
}
|
|
|
|
|
|
rule prefix() -> &'input str =
|
|
$(['+' | '-' | '!' ])
|
|
|
|
//TODO make the definition of operators more complex
|
|
rule operator() -> &'input str =
|
|
quiet!{$( ['+' | '-' | '*' | '/' | '%' | '<' | '>' | '=' | '!' | '$' | '&' | '|' | '?' | '^' | '`']+ )} /
|
|
expected!("operator")
|
|
|
|
|
|
rule extended_expr(struct_ok: bool) -> ExpressionKind =
|
|
item:extended_expr_ok_struct() {? if struct_ok { Ok(item) } else { Err("no-struct-allowed") } } /
|
|
item:extended_expr_no_struct() {? if !struct_ok { Ok(item) } else { Err("!no-struct-allowed") } }
|
|
|
|
#[cache_left_rec]
|
|
rule extended_expr_ok_struct() -> ExpressionKind =
|
|
indexee:extended_expr_ok_struct() indexers:index_part() {
|
|
ExpressionKind::Index {
|
|
indexee: Box::new(Expression::new(Default::default(), indexee)),
|
|
indexers,
|
|
}
|
|
} /
|
|
f:extended_expr_ok_struct() arguments:call_part() {
|
|
ExpressionKind::Call {
|
|
f: Box::new(Expression::new(Default::default(), f)),
|
|
arguments,
|
|
}
|
|
|
|
} /
|
|
expr:extended_expr_ok_struct() "." name:identifier() { ExpressionKind::Access {
|
|
name: Rc::new(name.to_string()),
|
|
expr: Box::new(Expression::new(Default::default(),expr)),
|
|
} } /
|
|
primary(true)
|
|
|
|
#[cache_left_rec]
|
|
rule extended_expr_no_struct() -> ExpressionKind =
|
|
indexee:extended_expr_no_struct() indexers:index_part() {
|
|
ExpressionKind::Index {
|
|
indexee: Box::new(Expression::new(Default::default(), indexee)),
|
|
indexers,
|
|
}
|
|
} /
|
|
f:extended_expr_no_struct() arguments:call_part() {
|
|
ExpressionKind::Call {
|
|
f: Box::new(Expression::new(Default::default(), f)),
|
|
arguments,
|
|
}
|
|
|
|
} /
|
|
expr:extended_expr_no_struct() "." name:identifier() { ExpressionKind::Access {
|
|
name: Rc::new(name.to_string()),
|
|
expr: Box::new(Expression::new(Default::default(),expr)),
|
|
} } /
|
|
primary(false)
|
|
|
|
rule index_part() -> Vec<Expression> =
|
|
"[" indexers:(expression() ++ ",") "]" { indexers }
|
|
|
|
rule call_part() -> Vec<InvocationArgument> =
|
|
"(" arguments:(invocation_argument() ** ",") ")" { arguments }
|
|
|
|
//TODO this shouldn't be an expression b/c type annotations disallowed here
|
|
rule invocation_argument() -> InvocationArgument =
|
|
_ "_" _ { InvocationArgument::Ignored } /
|
|
_ ident:identifier() _ "=" _ expr:expression() { InvocationArgument::Keyword {
|
|
name: Rc::new(ident.to_string()),
|
|
expr
|
|
} } /
|
|
_ expr:expression() _ { InvocationArgument::Positional(expr) }
|
|
|
|
|
|
rule primary(struct_ok: bool) -> ExpressionKind =
|
|
while_expr() / for_expr() / float_literal() / nat_literal() / bool_literal() / string_literal() / paren_expr() /
|
|
list_expr() / if_expr() / lambda_expr() /
|
|
item:named_struct() {? if struct_ok { Ok(item) } else { Err("no-struct-allowed") } } /
|
|
identifier_expr()
|
|
|
|
rule lambda_expr() -> ExpressionKind =
|
|
r#"\"# __ "(" _ params:formal_params() _ ")" _ type_anno:(type_anno()?) _ body:block() {
|
|
ExpressionKind::Lambda { params, type_anno, body }
|
|
} /
|
|
r#"\"# param:formal_param() _ type_anno:(type_anno()?) _ body:block() {
|
|
ExpressionKind::Lambda { params: vec![param], type_anno, body }
|
|
}
|
|
|
|
rule for_expr() -> ExpressionKind =
|
|
"for" _ enumerators:for_enumerators() _ body:for_body() {
|
|
ExpressionKind::ForExpression { enumerators, body }
|
|
}
|
|
|
|
rule for_enumerators() -> Vec<Enumerator> =
|
|
"{" _ enumerators:(enumerator() ++ ",") _ "}" { enumerators } /
|
|
enumerator:enumerator() { vec![enumerator] }
|
|
|
|
//TODO add guards, etc.
|
|
rule enumerator() -> Enumerator =
|
|
ident:identifier() _ "<-" _ generator:expression_no_struct() {
|
|
Enumerator { id: Rc::new(ident.to_string()), generator }
|
|
} /
|
|
//TODO need to distinguish these two cases in AST
|
|
ident:identifier() _ "=" _ generator:expression_no_struct() {
|
|
Enumerator { id: Rc::new(ident.to_string()), generator }
|
|
}
|
|
|
|
rule for_body() -> Box<ForBody> =
|
|
"return" _ expr:expression() { Box::new(ForBody::MonadicReturn(expr)) } /
|
|
body:block() { Box::new(ForBody::StatementBlock(body)) }
|
|
|
|
rule while_expr() -> ExpressionKind =
|
|
"while" _ cond:expression_kind_no_struct()? _ body:block() {
|
|
ExpressionKind::WhileExpression {
|
|
condition: cond.map(|kind| Box::new(Expression::new(Default::default(), kind))),
|
|
body,
|
|
}
|
|
}
|
|
|
|
|
|
rule identifier_expr() -> ExpressionKind =
|
|
qn:qualified_identifier() { ExpressionKind::Value(qn) }
|
|
|
|
rule named_struct() -> ExpressionKind =
|
|
name:qualified_identifier() _ fields:record_block() {
|
|
ExpressionKind::NamedStruct {
|
|
name,
|
|
fields: fields.into_iter().map(|(n, exp)| (Rc::new(n.to_string()), exp)).collect(),
|
|
}
|
|
}
|
|
|
|
|
|
//TODO anonymous structs, update syntax for structs
|
|
rule record_block() -> Vec<(&'input str, Expression)> =
|
|
"{" _ entries:(record_entry() ** ",") _ "}" { entries }
|
|
|
|
rule record_entry() -> (&'input str, Expression) =
|
|
_ name:identifier() _ ":" _ expr:expression() _ { (name, expr) }
|
|
|
|
rule qualified_identifier() -> QualifiedName =
|
|
names:(identifier() ++ "::") { QualifiedName { id: Default::default(), components: names.into_iter().map(|name| Rc::new(name.to_string())).collect() } }
|
|
|
|
//TODO improve the definition of identifiers
|
|
rule identifier() -> &'input str =
|
|
$(['a'..='z' | 'A'..='Z' | '_'] ['a'..='z' | 'A'..='Z' | '0'..='9' | '_']*)
|
|
|
|
|
|
rule if_expr() -> ExpressionKind =
|
|
"if" _ discriminator:(expression()?) _ body:if_expr_body() {
|
|
ExpressionKind::IfExpression {
|
|
discriminator: discriminator.map(Box::new),
|
|
body: Box::new(body),
|
|
}
|
|
}
|
|
|
|
rule if_expr_body() -> IfExpressionBody =
|
|
cond_block() / simple_pattern_match() / simple_conditional()
|
|
|
|
rule simple_conditional() -> IfExpressionBody =
|
|
"then" _ then_case:expr_or_block() _ else_case:else_case() {
|
|
IfExpressionBody::SimpleConditional { then_case, else_case }
|
|
}
|
|
|
|
rule simple_pattern_match() -> IfExpressionBody =
|
|
"is" _ pattern:pattern() _ "then" _ then_case:expr_or_block() _ else_case:else_case() {
|
|
IfExpressionBody::SimplePatternMatch { pattern, then_case, else_case }
|
|
}
|
|
|
|
rule cond_block() -> IfExpressionBody =
|
|
"{" _ cond_arms:(cond_arm() ++ ",") _ "}" { IfExpressionBody::CondList(cond_arms) }
|
|
|
|
rule cond_arm() -> ConditionArm =
|
|
_ "else" _ body:expr_or_block() { ConditionArm { condition: Condition::Else, guard: None, body } } /
|
|
_ condition:condition() _ guard:condition_guard() _ "then" _ body:expr_or_block()
|
|
{ ConditionArm { condition, guard, body } }
|
|
|
|
rule condition() -> Condition =
|
|
"is" _ pat:pattern() { Condition::Pattern(pat) } /
|
|
op:operator() _ expr:expression() { Condition::TruncatedOp(BinOp::from_sigil(op), expr) }
|
|
|
|
rule condition_guard() -> Option<Expression> =
|
|
("if" _ expr:expression() { expr } )?
|
|
|
|
|
|
rule expr_or_block() -> Block = block() / ex:expression() {
|
|
Statement {
|
|
id: Default::default(), location: Default::default(),
|
|
kind: StatementKind::Expression(ex)
|
|
}.into()
|
|
}
|
|
|
|
rule else_case() -> Option<Block> =
|
|
("else" _ eorb:expr_or_block() { eorb })?
|
|
|
|
rule pattern() -> Pattern =
|
|
"(" _ variants:(pattern() ++ ",") _ ")" { Pattern::TuplePattern(variants) } /
|
|
_ pat:simple_pattern() { pat }
|
|
|
|
rule simple_pattern() -> Pattern =
|
|
pattern_literal() /
|
|
qn:qualified_identifier() "(" members:(pattern() ** ",") ")" {
|
|
Pattern::TupleStruct(qn, members)
|
|
} /
|
|
qn:qualified_identifier() _ "{" _ items:(record_pattern_entry() ** ",") "}" _ {
|
|
let items = items.into_iter().map(|(name, pat)| (Rc::new(name.to_string()), pat)).collect();
|
|
Pattern::Record(qn, items)
|
|
} /
|
|
qn:qualified_identifier() { Pattern::VarOrName(qn) }
|
|
|
|
rule record_pattern_entry() -> (&'input str, Pattern) =
|
|
_ name:identifier() _ ":" _ pat:pattern() _ { (name, pat) } /
|
|
_ name:identifier() _ {
|
|
let qn = QualifiedName {
|
|
id: Default::default(),
|
|
components: vec![Rc::new(name.to_string())],
|
|
};
|
|
(name, Pattern::VarOrName(qn))
|
|
}
|
|
|
|
|
|
rule pattern_literal() -> Pattern =
|
|
"true" { Pattern::Literal(PatternLiteral::BoolPattern(true)) } /
|
|
"false" { Pattern::Literal(PatternLiteral::BoolPattern(false)) } /
|
|
s:bare_string_literal() { Pattern::Literal(PatternLiteral::StringPattern(Rc::new(s.to_string()))) } /
|
|
sign:("-"?) num:nat_literal() {
|
|
let neg = sign.is_some();
|
|
Pattern::Literal(PatternLiteral::NumPattern { neg, num })
|
|
} /
|
|
"_" { Pattern::Ignored }
|
|
|
|
|
|
rule list_expr() -> ExpressionKind =
|
|
"[" exprs:(expression() ** ",") "]" {
|
|
let mut exprs = exprs;
|
|
ExpressionKind::ListLiteral(exprs)
|
|
}
|
|
|
|
rule paren_expr() -> ExpressionKind =
|
|
"(" exprs:(expression() ** ",") ")" {
|
|
let mut exprs = exprs;
|
|
match exprs.len() {
|
|
1 => exprs.pop().unwrap().kind,
|
|
_ => ExpressionKind::TupleLiteral(exprs),
|
|
}
|
|
}
|
|
|
|
rule string_literal() -> ExpressionKind =
|
|
s:bare_string_literal(){ ExpressionKind::StringLiteral(Rc::new(s.to_string())) }
|
|
|
|
//TODO string escapes, prefixes
|
|
rule bare_string_literal() -> &'input str =
|
|
"\"" items:$([^ '"' ]*) "\"" { items }
|
|
|
|
rule bool_literal() -> ExpressionKind =
|
|
"true" { ExpressionKind::BoolLiteral(true) } / "false" { ExpressionKind::BoolLiteral(false) }
|
|
|
|
rule nat_literal() -> ExpressionKind =
|
|
bin_literal() / hex_literal() / unmarked_literal()
|
|
|
|
rule unmarked_literal() -> ExpressionKind =
|
|
digits:digits() { ExpressionKind::NatLiteral(digits.parse().unwrap()) }
|
|
|
|
rule bin_literal() -> ExpressionKind =
|
|
"0b" digits:bin_digits() { ExpressionKind::NatLiteral(parse_binary(digits)) }
|
|
|
|
rule hex_literal() -> ExpressionKind =
|
|
"0x" digits:hex_digits() { ExpressionKind::NatLiteral(parse_hex(digits)) }
|
|
|
|
rule float_literal() -> ExpressionKind =
|
|
ds:$( digits() "." digits()? / "." digits() ) { ExpressionKind::FloatLiteral(ds.parse().unwrap()) }
|
|
|
|
rule digits() -> &'input str = $((digit_group() "_"*)+)
|
|
rule bin_digits() -> &'input str = $((bin_digit_group() "_"*)+)
|
|
rule hex_digits() -> &'input str = $((hex_digit_group() "_"*)+)
|
|
|
|
rule digit_group() -> &'input str = $(['0'..='9']+)
|
|
rule bin_digit_group() -> &'input str = $(['0' | '1']+)
|
|
rule hex_digit_group() -> &'input str = $(['0'..='9' | 'a'..='f' | 'A'..='F']+)
|
|
|
|
}
|
|
}
|
|
|
|
fn parse_binary(digits: &str /*, tok: Token*/) -> u64 {
|
|
let mut result: u64 = 0;
|
|
let mut multiplier = 1;
|
|
for d in digits.chars().rev() {
|
|
match d {
|
|
'1' => result += multiplier,
|
|
'0' => (),
|
|
'_' => continue,
|
|
_ => unreachable!(),
|
|
}
|
|
multiplier = match multiplier.checked_mul(2) {
|
|
Some(m) => m,
|
|
None =>
|
|
/*return ParseError::new_with_token("This binary expression will overflow", tok),*/
|
|
panic!(),
|
|
}
|
|
}
|
|
//Ok(result)
|
|
result
|
|
}
|
|
|
|
//TODO fix these two functions
|
|
fn parse_hex(digits: &str) -> u64 {
|
|
let mut result: u64 = 0;
|
|
let mut multiplier: u64 = 1;
|
|
for d in digits.chars().rev() {
|
|
if d == '_' {
|
|
continue;
|
|
}
|
|
match d.to_digit(16) {
|
|
Some(n) => result += n as u64 * multiplier,
|
|
None => panic!(),
|
|
}
|
|
multiplier = match multiplier.checked_mul(16) {
|
|
Some(m) => m,
|
|
None => panic!(),
|
|
}
|
|
}
|
|
result
|
|
}
|
|
|
|
#[derive(Debug)]
|
|
struct BinopSequence {
|
|
first: ExpressionKind,
|
|
next: Vec<(BinOp, ExpressionKind)>,
|
|
}
|
|
|
|
impl BinopSequence {
|
|
fn do_precedence(self) -> ExpressionKind {
|
|
fn helper(
|
|
precedence: i32,
|
|
lhs: ExpressionKind,
|
|
rest: &mut Vec<(BinOp, ExpressionKind)>,
|
|
) -> Expression {
|
|
let mut lhs = Expression::new(Default::default(), lhs);
|
|
loop {
|
|
let (next_op, next_rhs) = match rest.pop() {
|
|
Some((a, b)) => (a, b),
|
|
None => break,
|
|
};
|
|
let new_precedence = next_op.get_precedence();
|
|
if precedence >= new_precedence {
|
|
rest.push((next_op, next_rhs));
|
|
break;
|
|
}
|
|
let rhs = helper(new_precedence, next_rhs, rest);
|
|
lhs = Expression::new(
|
|
Default::default(),
|
|
ExpressionKind::BinExp(next_op, Box::new(lhs), Box::new(rhs)),
|
|
);
|
|
}
|
|
lhs
|
|
}
|
|
let mut as_stack = self.next.into_iter().rev().collect();
|
|
helper(BinOp::min_precedence(), self.first, &mut as_stack).kind
|
|
}
|
|
}
|