From c6b0f7d7d1cb0f38b680a69811ca2ae667d59e40 Mon Sep 17 00:00:00 2001 From: greg Date: Thu, 13 Feb 2020 02:48:38 -0800 Subject: [PATCH] Pratt parsing --- schala-lang/language/src/ast/operators.rs | 4 ++ schala-lang/language/src/parser.rs | 61 ++++++++++++++++++----- 2 files changed, 52 insertions(+), 13 deletions(-) diff --git a/schala-lang/language/src/ast/operators.rs b/schala-lang/language/src/ast/operators.rs index 0e3f2b0..78030a6 100644 --- a/schala-lang/language/src/ast/operators.rs +++ b/schala-lang/language/src/ast/operators.rs @@ -68,6 +68,10 @@ impl BinOp { let s = token_kind_to_sigil(op_tok)?; Some(binop_precedences(s)) } + + pub fn precedence(&self) -> i32 { + binop_precedences(self.sigil.as_str()) + } } fn token_kind_to_sigil<'a>(tok: &'a TokenKind) -> Option<&'a str> { diff --git a/schala-lang/language/src/parser.rs b/schala-lang/language/src/parser.rs index fef059d..5dac49b 100644 --- a/schala-lang/language/src/parser.rs +++ b/schala-lang/language/src/parser.rs @@ -4,28 +4,21 @@ use std::rc::Rc; use std::str::FromStr; use nom::IResult; +use nom::character::complete::{one_of}; use nom::bytes::complete::{tag, take_while}; -use nom::combinator::{map_res, value, opt}; +use nom::combinator::{map, map_res, value, opt}; use nom::multi::many1; -//use nom::error::{ParseError, ErrorKind}; +use nom::error::{ParseError, ErrorKind}; use nom::branch::alt; +use nom::sequence::delimited; use crate::ast::*; use crate::builtin::Builtin; -/* -#[derive(Debug, Clone)] -enum Prefix { - Plus, - Minus, - Bang, -} -*/ - const OPERATOR_CHARS: &'static str = "~`!@#$%^&*-+=<>?/|"; -fn parse_operator(input: &str) -> IResult<&str, BinOp> { +fn parse_binop(input: &str) -> IResult<&str, BinOp> { use nom::character::complete::one_of; let (rest, op): (_, Vec) = many1(one_of(OPERATOR_CHARS))(input)?; let sigil: String = op.into_iter().collect(); @@ -41,6 +34,22 @@ fn parse_bool_literal(input: &str) -> IResult<&str, ExpressionKind> { Ok((rest, ExpressionKind::BoolLiteral(value))) } +fn parse_number_literal(input: &str) -> IResult<&str, ExpressionKind> { + let num_lit = many1(alt(( + map(one_of("1234567890"), |s: char| Some(s)), + map(nom::character::complete::char('_'), |_| None) + ))); + + let (rest, n) = map_res(num_lit, + |digits: Vec>| { + let num_str: String = digits.into_iter().filter_map(|x| x).collect(); + u64::from_str_radix(&num_str, 10) + })(input)?; + + Ok((rest, ExpressionKind::NatLiteral(n))) +} + + fn parse_binary_literal(input: &str) -> IResult<&str, ExpressionKind> { let (rest, _) = tag("0b")(input)?; let (rest, n): (&str, u64) = map_res( @@ -72,6 +81,7 @@ fn parse_string_literal(input: &str) -> IResult<&str, ExpressionKind> { fn parse_literal(input: &str) -> IResult<&str, ExpressionKind> { alt(( parse_string_literal, + parse_number_literal, parse_hex_literal, parse_binary_literal, parse_bool_literal @@ -119,8 +129,33 @@ fn prefix_expr(input: &str) -> IResult<&str, ExpressionKind> { } } +// this implements Pratt parsing, see http://journal.stuffwithstuff.com/2011/03/19/pratt-parsers-expression-parsing-made-easy/ +fn precedence_expr(input: &str, precedence: i32) -> IResult<&str, ExpressionKind> { + let (mut outer_rest, mut lhs) = prefix_expr(input)?; + loop { + let (rest, maybe_binop) = opt(parse_binop)(outer_rest)?; + println!("REST: {} | and maybe: {:?}", rest, maybe_binop); + let (new_precedence, binop) = match maybe_binop { + Some(binop) => (binop.precedence(), binop), + None => break, + }; + println!("Calculated new precedence: {} for binop: {:?}", new_precedence, binop); + + if precedence >= new_precedence { + break; + } + let (rest, rhs) = precedence_expr(rest, new_precedence)?; + outer_rest = rest; + lhs = ExpressionKind::BinExp(binop, + bx!(Expression::new(ItemId::new(0), lhs)), + bx!(Expression::new(ItemId::new(0), rhs)) + ); + } + Ok((outer_rest, lhs)) +} + fn expression_kind(input: &str) -> IResult<&str, ExpressionKind> { - prefix_expr(input) + precedence_expr(input, BinOp::min_precedence()) } pub fn perform_parsing(input: &str) -> Result {