schala/schala-lang/language/src/parsing/new.rs

304 lines
11 KiB
Rust
Raw Normal View History

use std::rc::Rc;
2021-11-04 21:11:19 -07:00
use crate::ast::*;
2021-11-03 23:57:22 -07:00
2021-11-03 16:27:42 -07:00
peg::parser! {
pub grammar schala_parser() for str {
2021-11-03 18:01:23 -07:00
2021-11-04 21:11:19 -07:00
rule whitespace() = [' ' | '\t' | '\n']*
2021-11-03 23:57:22 -07:00
2021-11-04 21:11:19 -07:00
rule _ = quiet!{ whitespace() }
2021-11-03 23:57:22 -07:00
2021-11-03 18:01:23 -07:00
pub rule program() -> AST =
n:(statement() ** delimiter() ) { AST { id: Default::default(), statements: n.into() } }
rule delimiter() = ";" / "\n"
rule statement() -> Statement =
2021-11-04 21:11:19 -07:00
_ expr:expression() { Statement {
2021-11-03 18:01:23 -07:00
id: Default::default(), location: Default::default(), kind: StatementKind::Expression(expr) }
2021-11-04 21:11:19 -07:00
}
rule block() -> Block = "{" items:(statement() ** delimiter()) "}" { items.into() }
2021-11-03 18:01:23 -07:00
pub rule expression() -> Expression =
2021-11-04 21:11:19 -07:00
_ kind:expression_kind() { Expression { id: Default::default(), type_anno: None, kind: kind } }
2021-11-03 18:01:23 -07:00
rule expression_kind() -> ExpressionKind =
2021-11-03 23:57:22 -07:00
precedence_expr()
rule precedence_expr() -> ExpressionKind =
2021-11-04 21:11:19 -07:00
first:prefix_expr() _ next:(precedence_continuation())* {
let next = next.into_iter().map(|(sigil, expr)| (BinOp::from_sigil(sigil), expr)).collect();
BinopSequence { first, next }.do_precedence()
}
2021-11-03 23:57:22 -07:00
rule precedence_continuation() -> (&'input str, ExpressionKind) =
op:operator() _ expr:primary() _ { (op, expr) }
2021-11-03 18:01:23 -07:00
2021-11-04 21:11:19 -07:00
rule prefix_expr() -> ExpressionKind =
prefix:prefix()? expr:extended_expr() {
if let Some(p) = prefix {
let expr = Expression::new(Default::default(), expr);
let prefix = PrefixOp::from_sigil(p);
ExpressionKind::PrefixExp(prefix, Box::new(expr))
} else {
expr
}
}
rule prefix() -> &'input str =
$(['+' | '-' | '!' ])
//TODO make the definition of operators more complex
2021-11-03 22:27:14 -07:00
rule operator() -> &'input str =
2021-11-04 21:11:19 -07:00
quiet!{$( ['+' | '-' | '*' | '/' | '%' | '<' | '>' | '=' | '!' | '$' | '&' | '|' | '?' | '^' | '`']+ )} /
expected!("operator")
rule extended_expr() -> ExpressionKind =
primary()
2021-11-03 22:27:14 -07:00
2021-11-03 18:01:23 -07:00
rule primary() -> ExpressionKind =
2021-11-04 21:11:19 -07:00
float_literal() / nat_literal() / bool_literal() / string_literal() / paren_expr() /
list_expr() / if_expr() / identifier_expr()
rule identifier_expr() -> ExpressionKind =
named_struct() / qn:qualified_identifier() { ExpressionKind::Value(qn) }
rule named_struct() -> ExpressionKind =
name:qualified_identifier() _ fields:record_block() {
ExpressionKind::NamedStruct {
name,
fields: fields.into_iter().map(|(n, exp)| (Rc::new(n.to_string()), exp)).collect(),
}
}
//TODO anonymous structs, update syntax for structs
rule record_block() -> Vec<(&'input str, Expression)> =
"{" _ entries:(record_entry() ** ",") _ "}" { entries }
rule record_entry() -> (&'input str, Expression) =
_ name:identifier() _ ":" _ expr:expression() _ { (name, expr) }
rule qualified_identifier() -> QualifiedName =
names:(identifier() ++ "::") { QualifiedName { id: Default::default(), components: names.into_iter().map(|name| Rc::new(name.to_string())).collect() } }
//TODO improve the definition of identifiers
rule identifier() -> &'input str =
$(['a'..='z' | 'A'..='Z' | '_'] ['a'..='z' | 'A'..='Z' | '0'..='9' | '_']*)
rule if_expr() -> ExpressionKind =
"if" _ discriminator:(expression()?) _ body:if_expr_body() {
ExpressionKind::IfExpression {
discriminator: discriminator.map(Box::new),
body: Box::new(body),
}
}
rule if_expr_body() -> IfExpressionBody =
cond_block() / simple_pattern_match() / simple_conditional()
rule simple_conditional() -> IfExpressionBody =
"then" _ then_case:expr_or_block() _ else_case:else_case() {
IfExpressionBody::SimpleConditional { then_case, else_case }
}
rule simple_pattern_match() -> IfExpressionBody =
"is" _ pattern:pattern() _ "then" _ then_case:expr_or_block() _ else_case:else_case() {
IfExpressionBody::SimplePatternMatch { pattern, then_case, else_case }
}
rule cond_block() -> IfExpressionBody =
"{" _ cond_arms:(cond_arm() ++ ",") _ "}" { IfExpressionBody::CondList(cond_arms) }
rule cond_arm() -> ConditionArm =
_ "else" _ body:expr_or_block() { ConditionArm { condition: Condition::Else, guard: None, body } } /
_ condition:condition() _ guard:condition_guard() _ "then" _ body:expr_or_block()
{ ConditionArm { condition, guard, body } }
rule condition() -> Condition =
"is" _ pat:pattern() { Condition::Pattern(pat) } /
op:operator() _ expr:expression() { Condition::TruncatedOp(BinOp::from_sigil(op), expr) }
rule condition_guard() -> Option<Expression> =
("if" _ expr:expression() { expr } )?
rule expr_or_block() -> Block = block() / ex:expression() {
Statement {
id: Default::default(), location: Default::default(),
kind: StatementKind::Expression(ex)
}.into()
}
rule else_case() -> Option<Block> =
("else" _ eorb:expr_or_block() { eorb })?
rule pattern() -> Pattern =
"(" _ variants:(pattern() ++ ",") _ ")" { Pattern::TuplePattern(variants) } /
_ pat:simple_pattern() { pat }
rule simple_pattern() -> Pattern =
pattern_literal() /
qn:qualified_identifier() "(" members:(pattern() ** ",") ")" {
Pattern::TupleStruct(qn, members)
} /
qn:qualified_identifier() _ "{" _ items:(record_pattern_entry() ** ",") "}" _ {
let items = items.into_iter().map(|(name, pat)| (Rc::new(name.to_string()), pat)).collect();
Pattern::Record(qn, items)
} /
qn:qualified_identifier() { Pattern::VarOrName(qn) }
rule record_pattern_entry() -> (&'input str, Pattern) =
_ name:identifier() _ ":" _ pat:pattern() _ { (name, pat) } /
_ name:identifier() _ {
let qn = QualifiedName {
id: Default::default(),
components: vec![Rc::new(name.to_string())],
};
(name, Pattern::VarOrName(qn))
}
rule pattern_literal() -> Pattern =
"true" { Pattern::Literal(PatternLiteral::BoolPattern(true)) } /
"false" { Pattern::Literal(PatternLiteral::BoolPattern(false)) } /
s:bare_string_literal() { Pattern::Literal(PatternLiteral::StringPattern(Rc::new(s.to_string()))) } /
sign:("-"?) num:nat_literal() {
let neg = sign.is_some();
Pattern::Literal(PatternLiteral::NumPattern { neg, num })
} /
"_" { Pattern::Ignored }
2021-11-03 20:31:46 -07:00
rule list_expr() -> ExpressionKind =
"[" exprs:(expression() ** ",") "]" {
let mut exprs = exprs;
ExpressionKind::ListLiteral(exprs)
}
rule paren_expr() -> ExpressionKind =
"(" exprs:(expression() ** ",") ")" {
let mut exprs = exprs;
match exprs.len() {
1 => exprs.pop().unwrap().kind,
_ => ExpressionKind::TupleLiteral(exprs),
}
2021-11-04 21:11:19 -07:00
}
rule string_literal() -> ExpressionKind =
2021-11-04 21:11:19 -07:00
s:bare_string_literal(){ ExpressionKind::StringLiteral(Rc::new(s.to_string())) }
//TODO string escapes, prefixes
rule bare_string_literal() -> &'input str =
"\"" items:$([^ '"' ]*) "\"" { items }
rule bool_literal() -> ExpressionKind =
"true" { ExpressionKind::BoolLiteral(true) } / "false" { ExpressionKind::BoolLiteral(false) }
2021-11-03 18:01:23 -07:00
rule nat_literal() -> ExpressionKind =
bin_literal() / hex_literal() / unmarked_literal()
rule unmarked_literal() -> ExpressionKind =
digits:digits() { ExpressionKind::NatLiteral(digits.parse().unwrap()) }
rule bin_literal() -> ExpressionKind =
"0b" digits:bin_digits() { ExpressionKind::NatLiteral(parse_binary(digits)) }
rule hex_literal() -> ExpressionKind =
"0x" digits:hex_digits() { ExpressionKind::NatLiteral(parse_hex(digits)) }
2021-11-03 18:01:23 -07:00
rule float_literal() -> ExpressionKind =
ds:$( digits() "." digits()? / "." digits() ) { ExpressionKind::FloatLiteral(ds.parse().unwrap()) }
rule digits() -> &'input str = $((digit_group() "_"*)+)
2021-11-04 21:11:19 -07:00
rule bin_digits() -> &'input str = $((bin_digit_group() "_"*)+)
rule hex_digits() -> &'input str = $((hex_digit_group() "_"*)+)
2021-11-03 18:01:23 -07:00
2021-11-04 21:11:19 -07:00
rule digit_group() -> &'input str = $(['0'..='9']+)
rule bin_digit_group() -> &'input str = $(['0' | '1']+)
rule hex_digit_group() -> &'input str = $(['0'..='9' | 'a'..='f' | 'A'..='F']+)
2021-11-03 16:27:42 -07:00
}
}
2021-11-03 18:01:23 -07:00
2021-11-04 21:11:19 -07:00
fn parse_binary(digits: &str /*, tok: Token*/) -> u64 {
let mut result: u64 = 0;
let mut multiplier = 1;
for d in digits.chars().rev() {
match d {
'1' => result += multiplier,
'0' => (),
'_' => continue,
2021-11-04 21:11:19 -07:00
_ => unreachable!(),
}
multiplier = match multiplier.checked_mul(2) {
Some(m) => m,
2021-11-04 21:11:19 -07:00
None =>
/*return ParseError::new_with_token("This binary expression will overflow", tok),*/
panic!(),
}
}
//Ok(result)
result
}
//TODO fix these two functions
fn parse_hex(digits: &str) -> u64 {
let mut result: u64 = 0;
let mut multiplier: u64 = 1;
for d in digits.chars().rev() {
if d == '_' {
continue;
}
match d.to_digit(16) {
Some(n) => result += n as u64 * multiplier,
None => panic!(),
}
multiplier = match multiplier.checked_mul(16) {
Some(m) => m,
2021-11-04 21:11:19 -07:00
None => panic!(),
}
}
result
}
2021-11-04 21:11:19 -07:00
#[derive(Debug)]
struct BinopSequence {
first: ExpressionKind,
next: Vec<(BinOp, ExpressionKind)>,
}
2021-11-04 21:11:19 -07:00
impl BinopSequence {
fn do_precedence(self) -> ExpressionKind {
fn helper(
precedence: i32,
lhs: ExpressionKind,
rest: &mut Vec<(BinOp, ExpressionKind)>,
) -> Expression {
let mut lhs = Expression::new(Default::default(), lhs);
loop {
let (next_op, next_rhs) = match rest.pop() {
Some((a, b)) => (a, b),
None => break,
};
let new_precedence = next_op.get_precedence();
if precedence >= new_precedence {
rest.push((next_op, next_rhs));
break;
2021-11-03 18:01:23 -07:00
}
2021-11-04 21:11:19 -07:00
let rhs = helper(new_precedence, next_rhs, rest);
lhs = Expression::new(
Default::default(),
ExpressionKind::BinExp(next_op, Box::new(lhs), Box::new(rhs)),
);
}
lhs
}
2021-11-04 21:11:19 -07:00
let mut as_stack = self.next.into_iter().rev().collect();
helper(BinOp::min_precedence(), self.first, &mut as_stack).kind
2021-11-03 18:01:23 -07:00
}
}