diff --git a/Cargo.lock b/Cargo.lock index 0626bc3..6e05333 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -110,6 +110,12 @@ dependencies = [ "constant_time_eq", ] +[[package]] +name = "bytecount" +version = "0.6.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "72feb31ffc86498dacdbd0fcebb56138e7177a8cc5cea4516031d15ae85a742e" + [[package]] name = "byteorder" version = "1.3.2" @@ -479,6 +485,17 @@ dependencies = [ "version_check 0.9.3", ] +[[package]] +name = "nom_locate" +version = "4.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "37794436ca3029a3089e0b95d42da1f0b565ad271e4d3bb4bad0c7bb70b10605" +dependencies = [ + "bytecount", + "memchr", + "nom 7.1.0", +] + [[package]] name = "num" version = "0.1.42" @@ -937,6 +954,7 @@ dependencies = [ "failure", "itertools", "nom 7.1.0", + "nom_locate", "peg", "pretty_assertions", "radix_trie", diff --git a/schala-lang/Cargo.toml b/schala-lang/Cargo.toml index 943d586..0a9df4a 100644 --- a/schala-lang/Cargo.toml +++ b/schala-lang/Cargo.toml @@ -17,6 +17,7 @@ assert_matches = "1.5" #peg = "0.7.0" peg = { git = "https://github.com/kevinmehall/rust-peg", rev = "960222580c8da25b17d32c2aae6f52f902728b62" } nom = "7.1.0" +nom_locate = "4.0.0" schala-repl = { path = "../schala-repl" } diff --git a/schala-lang/src/parsing/combinator.rs b/schala-lang/src/parsing/combinator.rs index a37c6db..d695dfd 100644 --- a/schala-lang/src/parsing/combinator.rs +++ b/schala-lang/src/parsing/combinator.rs @@ -9,9 +9,11 @@ use nom::{ sequence::{pair, tuple, preceded}, IResult, Parser, }; - use std::rc::Rc; +use nom_locate::{position, LocatedSpan}; +use std::rc::Rc; -type ParseResult<'a, O> = IResult<&'a str, O, VerboseError<&'a str>>; +type Span<'a> = LocatedSpan<&'a str>; +type ParseResult<'a, O> = IResult, O, VerboseError>>; use crate::ast::*; @@ -19,22 +21,22 @@ fn rc_string(s: &str) -> Rc { Rc::new(s.to_string()) } -fn tok<'a, O>(input_parser: impl Parser<&'a str, O, VerboseError<&'a str>>) -> impl FnMut(&'a str) --> IResult<&'a str, O, VerboseError<&'a str>> { +fn tok<'a, O>(input_parser: impl Parser, O, VerboseError>>) -> impl FnMut(Span<'a>) +-> IResult, O, VerboseError>> { context("tok", map(tuple((ws0, input_parser)), |(_, output)| output)) } -fn kw<'a>(keyword_str: &'static str) -> impl FnMut(&'a str) -> ParseResult<()> { +fn kw<'a>(keyword_str: &'static str) -> impl FnMut(Span<'a>) -> ParseResult<()> { context("keyword", tok(value((), tag(keyword_str)))) } // whitespace does consume at least one piece of whitespace - use ws0 for maybe none -fn whitespace(input: &str) -> ParseResult<()> { +fn whitespace(input: Span) -> ParseResult<()> { context("whitespace", alt(( block_comment, @@ -43,18 +45,18 @@ fn whitespace(input: &str) -> ParseResult<()> { )))(input) } -fn ws0(input: &str) -> ParseResult<()> { +fn ws0(input: Span) -> ParseResult<()> { context("WS0", value((), many0(whitespace)))(input) } -fn line_comment(input: &str) -> ParseResult<()> { +fn line_comment(input: Span) -> ParseResult<()> { value((), tuple((tag("//"), not_line_ending)), )(input) } -fn block_comment(input: &str) -> ParseResult<()> { +fn block_comment(input: Span) -> ParseResult<()> { context("Block-comment", value((), tuple(( @@ -68,7 +70,7 @@ fn block_comment(input: &str) -> ParseResult<()> { ))))(input) } -fn statement_delimiter(input: &str) -> ParseResult<()> { +fn statement_delimiter(input: Span) -> ParseResult<()> { tok(alt(( value((), line_ending), value((), char(';')) @@ -76,7 +78,7 @@ fn statement_delimiter(input: &str) -> ParseResult<()> { )(input) } -fn block(input: &str) -> ParseResult { +fn block(input: Span) -> ParseResult { context("block", map( tuple(( @@ -88,7 +90,7 @@ fn block(input: &str) -> ParseResult { )), |(_, _, items, _, _)| items.into()))(input) } -fn statement(input: &str) -> ParseResult { +fn statement(input: Span) -> ParseResult { context("Parsing-statement", map(expression, |expr| Statement { id: Default::default(), @@ -97,17 +99,17 @@ fn statement(input: &str) -> ParseResult { }))(input) } -fn expression(input: &str) -> ParseResult { +fn expression(input: Span) -> ParseResult { map(pair(expression_kind, opt(type_anno)), |(kind, maybe_anno)| { Expression::new(Default::default(), kind) })(input) } -fn type_anno(input: &str) -> ParseResult { +fn type_anno(input: Span) -> ParseResult { preceded(kw(":"), type_identifier)(input) } -fn type_identifier(input: &str) -> ParseResult { +fn type_identifier(input: Span) -> ParseResult { /* alt(( tuple((kw("("), separated_list0(kw(","), type_identifier), kw(")"))), @@ -117,16 +119,15 @@ fn type_identifier(input: &str) -> ParseResult { unimplemented!() } -fn type_singleton_name(input: &str) -> ParseResult { +fn type_singleton_name(input: Span) -> ParseResult { unimplemented!() } -pub fn expression_kind(input: &str) -> ParseResult { +pub fn expression_kind(input: Span) -> ParseResult { context("expression-kind", primary_expr)(input) } -fn primary_expr(input: &str) -> ParseResult { - +fn primary_expr(input: Span) -> ParseResult { context("primary-expr", alt(( number_literal, @@ -136,19 +137,19 @@ fn primary_expr(input: &str) -> ParseResult { } -fn identifier_expr(input: &str) -> ParseResult { +fn identifier_expr(input: Span) -> ParseResult { context("identifier-expr", map(qualified_identifier, ExpressionKind::Value))(input) } -fn qualified_identifier(input: &str) -> ParseResult { +fn qualified_identifier(input: Span) -> ParseResult { tok( map( - separated_list1(tag("::"), map(identifier, rc_string)), + separated_list1(tag("::"), map(identifier, |x| rc_string(x.fragment()))), |items| QualifiedName { id: Default::default(), components: items } ))(input) } -fn identifier(input: &str) -> ParseResult<&str> { +fn identifier(input: Span) -> ParseResult { recognize( tuple(( alt((tag("_"), alpha1)), @@ -156,7 +157,7 @@ fn identifier(input: &str) -> ParseResult<&str> { )))(input) } -fn bool_literal(input: &str) -> ParseResult { +fn bool_literal(input: Span) -> ParseResult { context("bool-literal", alt(( map(kw("true"), |_| ExpressionKind::BoolLiteral(true)), @@ -164,48 +165,48 @@ fn bool_literal(input: &str) -> ParseResult { )))(input) } -fn number_literal(input: &str) -> ParseResult { +fn number_literal(input: Span) -> ParseResult { map(alt((tok(hex_literal), tok(bin_literal), tok(dec_literal))), ExpressionKind::NatLiteral)(input) } -fn dec_literal(input: &str) -> ParseResult { +fn dec_literal(input: Span) -> ParseResult { map(digits(digit_group_dec), |chars: Vec| { let s: String = chars.into_iter().collect(); s.parse().unwrap() })(input) } -fn hex_literal(input: &str) -> ParseResult { +fn hex_literal(input: Span) -> ParseResult { map(preceded(alt((tag("0x"), tag("0X"))), digits(digit_group_hex)), |chars: Vec| { let s: String = chars.into_iter().collect(); parse_hex(&s).unwrap() })(input) } -fn bin_literal(input: &str) -> ParseResult { +fn bin_literal(input: Span) -> ParseResult { map(preceded(alt((tag("0b"), tag("0B"))), digits(digit_group_bin)), |chars: Vec| { let s: String = chars.into_iter().collect(); parse_binary(&s).unwrap() })(input) } -fn digits<'a, E: ParseError<&'a str>>( - digit_type: impl Parser<&'a str, Vec, E>, -) -> impl FnMut(&'a str) -> IResult<&'a str, Vec, E> { +fn digits<'a, E: ParseError>>( + digit_type: impl Parser, Vec, E>, +) -> impl FnMut(Span<'a>) -> IResult, Vec, E> { map(separated_list1(many1(char('_')), digit_type), |items: Vec>| { items.into_iter().flatten().collect() }) } -fn digit_group_dec(input: &str) -> ParseResult> { +fn digit_group_dec(input: Span) -> ParseResult> { many1(one_of("0123456789"))(input) } -fn digit_group_hex(input: &str) -> ParseResult> { +fn digit_group_hex(input: Span) -> ParseResult> { many1(one_of("0123456789abcdefABCDEF"))(input) } -fn digit_group_bin(input: &str) -> ParseResult> { +fn digit_group_bin(input: Span) -> ParseResult> { many1(one_of("01"))(input) } @@ -252,57 +253,66 @@ mod test { use super::*; -fn rc(s: &str) -> Rc { - Rc::new(s.to_owned()) -} -macro_rules! qn { - ( $( $component:ident),* ) => { - { - let mut components = vec![]; - $( - components.push(rc(stringify!($component))); - )* - QualifiedName { components, id: Default::default() } + fn rc(s: &str) -> Rc { + Rc::new(s.to_owned()) + } + macro_rules! qn { + ( $( $component:ident),* ) => { + { + let mut components = vec![]; + $( + components.push(rc(stringify!($component))); + )* + QualifiedName { components, id: Default::default() } + } + }; + } + + macro_rules! span { + ($func:expr, $input:expr) => { + $func(Span::new($input)).map(|(span, x)| (*span.fragment(), x)) + }; } - }; -} #[test] fn combinator_test1() { - assert_eq!(digits(digit_group_dec)("342").unwrap().1, vec!['3', '4', '2']); - assert_eq!(bin_literal("0b1111qsdf"), Ok(("qsdf", 15))); + assert_eq!(span!(digits(digit_group_dec), "342"), Ok(("", vec!['3', '4', '2']))); + assert_eq!(span!(bin_literal, "0b1111qsdf"), Ok(("qsdf", 15))); } #[test] fn combinator_test_ws0() { - assert_eq!(block_comment("/*yolo*/").unwrap(), ("", ())); - assert_eq!(block_comment("/*yolo*/ jumpy /*nah*/").unwrap(), (" jumpy /*nah*/", ())); - assert_eq!(ws0("/* yolo */ ").unwrap(), ("", ())); - assert_eq!(ws0("/* /* no */ yolo */ ").unwrap(), ("", ())); + assert_eq!(span!(block_comment, "/*yolo*/"), Ok(("", ()))); + assert_eq!(span!(block_comment, "/*yolo*/ jumpy /*nah*/"), Ok((" jumpy /*nah*/", ()))); + assert_eq!(span!(ws0, "/* yolo */ "), Ok(("", ()))); + assert_eq!(span!(ws0, "/* /* no */ yolo */ "), Ok(("", ()))); } #[test] fn combinator_test2() { for s in [" 15", " 0b1111", " 1_5_", "0XF__", "0Xf"].iter() { - assert_eq!(expression_kind(s).unwrap().1, ExpressionKind::NatLiteral(15)); + assert_eq!(span!(expression_kind, s).unwrap().1, ExpressionKind::NatLiteral(15)); } - assert_eq!(expression_kind(" /*gay*/ true").unwrap().1, ExpressionKind::BoolLiteral(true)); - assert_eq!(expression_kind(" /*yolo*/ barnaby").unwrap().1, ExpressionKind::Value(qn!(barnaby))); + assert_eq!(span!(expression_kind, " /*gay*/ true").unwrap().1, ExpressionKind::BoolLiteral(true)); + assert_eq!(span!(expression_kind, " /*yolo*/ barnaby").unwrap().1, ExpressionKind::Value(qn!(barnaby))); } #[test] fn combinator_test3() { let source = "{}"; - assert_eq!(block(source).unwrap().1, vec![].into()); + assert_eq!(span!(block, source).unwrap().1, vec![].into()); let source = r#"{ //hella 4_5 //bog 11; /*chutney*/0xf }"#; - let parsed = block(source).map_err(|err| match err { - Err::Error(err) | Err::Failure(err) => nom::error::convert_error(source, err), + let parsed = span!(block, source).map_err(|err| match err { + Err::Error(err) | Err::Failure(err) => { + let err = VerboseError { errors: err.errors.into_iter().map(|(sp, kind)| (*sp.fragment(), kind)).collect() }; + nom::error::convert_error(source, err) + }, _ => panic!() });