diff --git a/Cargo.lock b/Cargo.lock index 8c7cdeb..0626bc3 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -397,9 +397,15 @@ checksum = "3e2e65a1a2e43cfcb47a895c4c8b10d1f4a61097f9f254f183aee60cad9c651d" [[package]] name = "memchr" -version = "2.2.1" +version = "2.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "88579771288728879b57485cc7d6b07d648c9f0141eb955f8ab7f9d45394468e" +checksum = "308cc39be01b73d0d18f82a0e7b2a3df85245f84af96fdddc5d202d27e47b86a" + +[[package]] +name = "minimal-lexical" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a" [[package]] name = "miniz-sys" @@ -462,6 +468,17 @@ dependencies = [ "version_check 0.1.5", ] +[[package]] +name = "nom" +version = "7.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1b1d11e1ef389c76fe5b81bcaf2ea32cf88b62bc494e19f493d0b30e7a930109" +dependencies = [ + "memchr", + "minimal-lexical", + "version_check 0.9.3", +] + [[package]] name = "num" version = "0.1.42" @@ -919,6 +936,7 @@ dependencies = [ "ena", "failure", "itertools", + "nom 7.1.0", "peg", "pretty_assertions", "radix_trie", @@ -1068,7 +1086,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8e51065bafd2abe106b6036483b69d1741f4a1ec56ce8a2378de341637de689e" dependencies = [ "fnv", - "nom", + "nom 4.2.3", "phf", "phf_codegen", ] diff --git a/schala-lang/Cargo.toml b/schala-lang/Cargo.toml index 7985d46..943d586 100644 --- a/schala-lang/Cargo.toml +++ b/schala-lang/Cargo.toml @@ -16,6 +16,7 @@ radix_trie = "0.1.5" assert_matches = "1.5" #peg = "0.7.0" peg = { git = "https://github.com/kevinmehall/rust-peg", rev = "960222580c8da25b17d32c2aae6f52f902728b62" } +nom = "7.1.0" schala-repl = { path = "../schala-repl" } diff --git a/schala-lang/src/parsing/combinator.rs b/schala-lang/src/parsing/combinator.rs new file mode 100644 index 0000000..4c7a3f4 --- /dev/null +++ b/schala-lang/src/parsing/combinator.rs @@ -0,0 +1,243 @@ +use nom::{ + Err, + branch::alt, + bytes::complete::tag, + character::complete::{char, one_of, space0, space1, multispace0, line_ending}, + combinator::{peek, not, value, map}, + error::{context, VerboseError, ParseError}, + multi::{fold_many1, many1, many0, separated_list1}, + sequence::{tuple, preceded}, + IResult, Parser, +}; + +type ParseResult<'a, O> = IResult<&'a str, O, VerboseError<&'a str>>; + +use crate::ast::*; + +/* +fn block(input: &str) -> ParseResult { + context("block", + map( + tuple(( + char('{'), + value((), context("TOP", many0(alt((line_separator, ws))))), + block_items, + value((), many0(alt((line_separator, ws)))), + char('}'), + )), |(_, _, items, _, _)| items.into()))(input) +} +*/ + + +fn tok<'a, O>(input_parser: impl Parser<&'a str, O, VerboseError<&'a str>>) -> impl FnMut(&'a str) -> IResult<&'a str, O, VerboseError<&'a str>> { + map(tuple((ws0, input_parser)), |(_, output)| output) +} + +// whitespace does consume at least one piece of whitespace - use ws0 for maybe none +fn whitespace(input: &str) -> ParseResult<()> { + context("whitespace", + alt(( + value((), space1), + line_comment, + block_comment, + )))(input) +} + +fn ws0(input: &str) -> ParseResult<()> { + context("WS0", + value((), many0(whitespace)))(input) +} + +fn line_comment(input: &str) -> ParseResult<()> { + value((), + tuple((tag("//"), many0(not(line_ending)), peek(line_ending))) + )(input) +} + +fn block_comment(input: &str) -> ParseResult<()> { + value((), + tuple(( + tag("/*"), + many0(alt(( + block_comment, + not(tag("*/")) + ))), + tag("*/") + )))(input) +} + +fn line_separator(input: &str) -> ParseResult<()> { + alt((value((), line_ending), value((), char(';'))))(input) +} + +fn block_items(input: &str) -> ParseResult> { + context("Block-item", + separated_list1( + preceded(context("LLLL", ws0), many1(line_separator)), + statement, + ))(input) +} + +fn statement(input: &str) -> ParseResult { + context("Parsing-statement", + map( + tuple(( + ws0, + expression_kind, + ws0 + )),|(_, kind, _)| Statement { + id: Default::default(), + location: Default::default(), + kind: StatementKind::Expression(Expression::new(Default::default(), kind)), + }))(input) +} + +pub fn expression_kind(input: &str) -> ParseResult { + context("expression-kind", + alt(( + number_literal, + bool_literal, + )))(input) +} + +fn bool_literal(input: &str) -> ParseResult { + alt(( + map(tok(tag("true")), |_| ExpressionKind::BoolLiteral(true)), + map(tok(tag("false")), |_| ExpressionKind::BoolLiteral(false)), + ))(input) +} + +fn number_literal(input: &str) -> ParseResult { + map(alt((tok(hex_literal), tok(bin_literal), tok(dec_literal))), ExpressionKind::NatLiteral)(input) +} + +fn dec_literal(input: &str) -> ParseResult { + map(digits(digit_group_dec), |chars: Vec| { + let s: String = chars.into_iter().collect(); + s.parse().unwrap() + })(input) +} + +fn hex_literal(input: &str) -> ParseResult { + map(preceded(alt((tag("0x"), tag("0X"))), digits(digit_group_hex)), |chars: Vec| { + let s: String = chars.into_iter().collect(); + parse_hex(&s).unwrap() + })(input) +} + +fn bin_literal(input: &str) -> ParseResult { + map(preceded(alt((tag("0b"), tag("0B"))), digits(digit_group_bin)), |chars: Vec| { + let s: String = chars.into_iter().collect(); + parse_binary(&s).unwrap() + })(input) +} + +fn digits<'a, E: ParseError<&'a str>>( + digit_type: impl Parser<&'a str, Vec, E>, +) -> impl FnMut(&'a str) -> IResult<&'a str, Vec, E> { + map(separated_list1(many1(char('_')), digit_type), |items: Vec>| { + items.into_iter().flatten().collect() + }) +} + +fn digit_group_dec(input: &str) -> ParseResult> { + many1(one_of("0123456789"))(input) +} + +fn digit_group_hex(input: &str) -> ParseResult> { + many1(one_of("0123456789abcdefABCDEF"))(input) +} + +fn digit_group_bin(input: &str) -> ParseResult> { + many1(one_of("01"))(input) +} + +fn parse_binary(digits: &str) -> Result { + let mut result: u64 = 0; + let mut multiplier = 1; + for d in digits.chars().rev() { + match d { + '1' => result += multiplier, + '0' => (), + '_' => continue, + _ => unreachable!(), + } + multiplier = match multiplier.checked_mul(2) { + Some(m) => m, + None => return Err("Binary expression will overflow"), + } + } + Ok(result) +} + +fn parse_hex(digits: &str) -> Result { + let mut result: u64 = 0; + let mut multiplier: u64 = 1; + for d in digits.chars().rev() { + if d == '_' { + continue; + } + match d.to_digit(16) { + Some(n) => result += n as u64 * multiplier, + None => return Err("Internal parser error: invalid hex digit"), + } + multiplier = match multiplier.checked_mul(16) { + Some(m) => m, + None => return Err("Hexadecimal expression will overflow"), + } + } + Ok(result) +} + +#[cfg(test)] +mod test { + use pretty_assertions::assert_eq; + + use super::*; + + #[test] + fn combinator_test1() { + assert_eq!(digits(digit_group_dec)("342").unwrap().1, vec!['3', '4', '2']); + assert_eq!(bin_literal("0b1111qsdf"), Ok(("qsdf", 15))); + } + + #[test] + fn combinator_test2() { + for s in ["15", "0b1111", "1_5_", "0XF__", "0Xf"].iter() { + assert_eq!(expression_kind(s).unwrap().1, ExpressionKind::NatLiteral(15)); + } + } + + /* + #[test] + fn combinator_test3() { + let source = r#"{ + + 4_5 + 11; 0xf + }"#; + let parsed = block(source).map_err(|err| match err { + Err::Error(err) | Err::Failure(err) => nom::error::convert_error(source, err), + _ => panic!() + }); + //let parsed = block(source); + + if let Err(err) = parsed { + println!("{}", err); + panic!("parse error desu!"); + } + + assert_eq!(parsed.unwrap().1, vec![ + Statement { id: Default::default(), location: + Default::default(), kind: StatementKind::Expression(Expression::new(Default::default(), + ExpressionKind::NatLiteral(45))) }, + Statement { id: Default::default(), location: + Default::default(), kind: StatementKind::Expression(Expression::new(Default::default(), + ExpressionKind::NatLiteral(11))) }, + Statement { id: Default::default(), location: + Default::default(), kind: StatementKind::Expression(Expression::new(Default::default(), + ExpressionKind::NatLiteral(15))) }, + ].into()); + } + */ +} diff --git a/schala-lang/src/parsing/mod.rs b/schala-lang/src/parsing/mod.rs index 6be3d9e..93d6c11 100644 --- a/schala-lang/src/parsing/mod.rs +++ b/schala-lang/src/parsing/mod.rs @@ -1,5 +1,6 @@ #![allow(clippy::upper_case_acronyms)] +mod combinator; mod peg_parser; mod test;