From 219f5a183a774984a846e5b895017f581e62f1a0 Mon Sep 17 00:00:00 2001 From: Greg Shuflin Date: Fri, 19 Nov 2021 09:59:08 -0800 Subject: [PATCH] Various cleanup --- schala-lang/src/parsing/combinator.rs | 73 +++++++++++++++-------- schala-lang/src/parsing/test.rs | 83 ++++++++++++++++----------- 2 files changed, 96 insertions(+), 60 deletions(-) diff --git a/schala-lang/src/parsing/combinator.rs b/schala-lang/src/parsing/combinator.rs index 7045bc1..3db803d 100644 --- a/schala-lang/src/parsing/combinator.rs +++ b/schala-lang/src/parsing/combinator.rs @@ -7,11 +7,11 @@ use nom::{ complete::{alpha1, alphanumeric0, char, line_ending, none_of, not_line_ending, one_of, space1}, is_alphanumeric, }, - combinator::{cut, map, not, opt, peek, recognize, value}, + combinator::{cut, eof, map, not, opt, peek, recognize, value, verify}, error::{context, ParseError, VerboseError}, multi::{many0, many1, separated_list0, separated_list1}, - sequence::{delimited, pair, preceded, separated_pair, tuple}, - IResult, Parser, + sequence::{delimited, pair, preceded, separated_pair, terminated, tuple}, + Finish, IResult, Parser, }; use nom_locate::{position, LocatedSpan}; @@ -27,6 +27,38 @@ use crate::ast::*; fn rc_string(s: &str) -> Rc { Rc::new(s.to_string()) } + +fn is_keyword(input: &str) -> bool { + let keywords = [ + "if", + "then", + "else", + "is", + "fn", + "for", + "while", + "in", + "true", + "false", + "let", + "in", + "mut", + "return", + "break", + "continue", + "type", + "alias", + "self", + "Self", + "interface", + "impl", + "module", + "import", + ]; + + keywords.iter().any(|kw| kw == &input) +} + fn fresh_id(span: &Span) -> Id { let mut table_handle = span.extra.borrow_mut(); table_handle.fresh() @@ -750,9 +782,17 @@ fn identifier(input: Span) -> ParseResult { } fn identifier_span(input: Span) -> ParseResult { - recognize(pair(alt((tag("_"), alpha1)), take_while(|ch: char| is_alphanumeric(ch as u8) || ch == '_')))( - input, - ) + fn check(input: &Span) -> bool { + !is_keyword(input.fragment()) + } + + verify( + recognize(tuple(( + alt((tag("_"), alpha1)), + take_while(|ch: char| is_alphanumeric(ch as u8) || ch == '_'), + ))), + check, + )(input) } fn bool_literal(input: Span) -> ParseResult { @@ -897,21 +937,6 @@ mod test { use super::*; - fn rc(s: &str) -> Rc { - Rc::new(s.to_owned()) - } - macro_rules! qn { - ( $( $component:ident),* ) => { - { - let mut components = vec![]; - $( - components.push(rc(stringify!($component))); - )* - QualifiedName { components, id: Default::default() } - } - }; - } - macro_rules! span { ($func:expr, $input:expr) => {{ let id_store: IdStore = IdStore::new(); @@ -926,6 +951,8 @@ mod test { assert_eq!(span!(bin_literal, "0b1111qsdf"), Ok(("qsdf", 15))); assert_eq!(span!(bare_string_literal, r#""fah""#), Ok(("", "fah".to_string()))); assert_eq!(span!(bare_string_literal, r#""""#), Ok(("", "".to_string()))); + assert_eq!(*span!(identifier_span, "modulek").unwrap().1.fragment(), "modulek"); + assert!(span!(identifier_span, "module").is_err()); } #[test] @@ -946,9 +973,5 @@ mod test { span!(expression_kind(true), " /*gay*/ true").unwrap().1, ExpressionKind::BoolLiteral(true) ); - assert_eq!( - span!(expression_kind(true), " /*yolo*/ barnaby").unwrap().1, - ExpressionKind::Value(qn!(barnaby)) - ); } } diff --git a/schala-lang/src/parsing/test.rs b/schala-lang/src/parsing/test.rs index 522fe91..36a775b 100644 --- a/schala-lang/src/parsing/test.rs +++ b/schala-lang/src/parsing/test.rs @@ -85,6 +85,7 @@ fn ty_simple(name: &str) -> TypeIdentifier { TypeIdentifier::Singleton(TypeSingletonName { name: rc(name), params: vec![] }) } +/* macro_rules! assert_ast { ($input:expr, $statements:expr) => { let mut parser = Parser::new(); @@ -97,8 +98,9 @@ macro_rules! assert_ast { assert_eq!(ast.unwrap(), expected); }; } +*/ -macro_rules! assert_ast_comb { +macro_rules! assert_ast { ($input:expr, $statements:expr) => { let mut parser = Parser::new(); let ast = parser.parse_comb($input); @@ -119,6 +121,14 @@ macro_rules! assert_fail { }; } +macro_rules! assert_fail_comb { + ($input:expr, $failure:expr) => { + let mut parser = Parser::new(); + let err = parser.parse_comb($input).unwrap_err(); + assert_eq!(err.msg, $failure); + }; +} + /* macro_rules! assert_expr { ($input:expr, $correct:expr) => { @@ -195,7 +205,7 @@ fn binexps() { use StatementKind::Expression; assert_expr!("0xf_f+1", binop("+", expr(NatLiteral(255)), expr(NatLiteral(1)))); - assert_ast_comb!( + assert_ast!( "3; 4; 4.3", vec![ stmt(Expression(expr(NatLiteral(3)))), @@ -509,7 +519,7 @@ fn complex_lambdas() { use ExpressionKind::*; //TODO support this without the semicolon after the lambda - assert_ast_comb! { + assert_ast! { r#"fn wahoo() { let a = 10; \(x) { x + a }; } wahoo()(3) "#, vec![ @@ -553,7 +563,7 @@ fn type_annotations() { use ExpressionKind::*; use TypeIdentifier::*; - assert_ast_comb!( + assert_ast!( "let a = b : Int", vec![decl(Declaration::Binding { name: rc("a"), @@ -611,7 +621,7 @@ fn type_annotations() { #[test] fn type_declarations() { use Declaration::TypeDecl; - assert_ast_comb! { + assert_ast! { "type Alpha = Alpha", vec![ decl(TypeDecl { name: TypeSingletonName { name: rc("Alpha"), params: vec![] }, @@ -627,7 +637,7 @@ fn type_declarations() { ] }; - assert_ast_comb!( + assert_ast!( "type mut Kuah = Kuah", decl(TypeDecl { name: TypeSingletonName { name: rc("Kuah"), params: vec![] }, @@ -640,7 +650,7 @@ fn type_declarations() { }) ); - assert_ast_comb! { + assert_ast! { "type Alpha = Alpha { a: Int, b: Int }", vec![decl(TypeDecl { name: TypeSingletonName { name: rc("Alpha"), params: vec![] }, @@ -658,7 +668,7 @@ fn type_declarations() { })] }; - assert_ast_comb! { + assert_ast! { "type Alpha = { a: Int, b: Int }", vec![decl(TypeDecl { name: TypeSingletonName { name: rc("Alpha"), params: vec![] }, @@ -671,7 +681,7 @@ fn type_declarations() { })] }; - assert_ast_comb!( + assert_ast!( "type Option = None | Some(T)", vec![decl(TypeDecl { name: TypeSingletonName { @@ -693,12 +703,12 @@ fn type_declarations() { })] ); - assert_ast_comb!( + assert_ast!( "type alias Alpha = Beta", decl(Declaration::TypeAlias { alias: rc("Alpha"), original: rc("Beta") }) ); - assert_ast_comb!("type Complex = Unit | Record { field: AnotherType, field2: (Nat, Int), field3: T } | Tuple(Int, (String, T))", + assert_ast!("type Complex = Unit | Record { field: AnotherType, field2: (Nat, Int), field3: T } | Tuple(Int, (String, T))", decl(TypeDecl { name: TypeSingletonName { name: rc("Complex"), params: vec![ TypeIdentifier::Singleton(TypeSingletonName { name: rc("T"), params: vec![] }), @@ -735,7 +745,7 @@ fn type_declarations() { fn declarations() { use ExpressionKind::*; - assert_ast_comb!( + assert_ast!( "let q_q = Yolo::Swaggins", vec![decl(Declaration::Binding { name: rc("q_q"), @@ -750,7 +760,7 @@ fn declarations() { fn bindings() { use ExpressionKind::*; - assert_ast_comb!( + assert_ast!( "let mut a = 10", vec![decl(Declaration::Binding { name: rc("a"), @@ -760,7 +770,7 @@ fn bindings() { })] ); - assert_ast_comb!( + assert_ast!( "let a = 2 + a", vec![stmt(StatementKind::Declaration(Declaration::Binding { name: rc("a"), @@ -770,7 +780,7 @@ fn bindings() { }))] ); - assert_ast_comb!( + assert_ast!( "let a: Nat = 2", vec![stmt(StatementKind::Declaration(Declaration::Binding { name: rc("a"), @@ -784,7 +794,7 @@ fn bindings() { #[test] fn functions() { use ExpressionKind::*; - assert_ast_comb!( + assert_ast!( "fn oi()", vec![stmt(StatementKind::Declaration(Declaration::FuncSig(Signature { name: rc("oi"), @@ -794,7 +804,7 @@ fn functions() { })))] ); - assert_ast_comb!( + assert_ast!( "oi()", vec![stmt(StatementKind::Expression(expr(Call { f: bx(expr(Value(qn!(oi)))), arguments: vec![] })))] ); @@ -811,7 +821,7 @@ fn functions() { ); assert_fail!("a(b,,c)","error at 1:5: expected one of \"(\", \".\", \"0b\", \"0x\", \"[\", \"\\\"\", \"_\", \"false\", \"for\", \"if\", \"true\", \"while\", ['+' | '-' | '!'], ['0' ..= '9'], ['a' ..= 'z' | 'A' ..= 'Z' | '_'], r#\"\\\"#"); - assert_ast_comb!( + assert_ast!( "fn a(b, c: Int): Int", vec![stmt(StatementKind::Declaration(Declaration::FuncSig(Signature { name: rc("a"), @@ -836,7 +846,7 @@ fn functions() { }"#; - assert_ast_comb!( + assert_ast!( source, vec![fn_decl( Signature { name: rc("some_function"), operator: false, type_anno: None, params: vec![] }, @@ -876,7 +886,7 @@ fn functions_with_different_whitespace() { "#; for item in [a, b, c].iter() { - assert_ast_comb!( + assert_ast!( item, vec![fn_decl( Signature { @@ -899,7 +909,7 @@ fn functions_with_different_whitespace() { fn functions_with_default_args() { use ExpressionKind::*; - assert_ast_comb!( + assert_ast!( "fn func(x: Int, y: Int = 4) { }", vec![fn_decl( Signature { @@ -923,7 +933,7 @@ fn functions_with_default_args() { #[test] fn interface() { let glue = TypeIdentifier::Singleton(TypeSingletonName { name: rc("Glue"), params: vec![] }); - assert_ast_comb!( + assert_ast!( "interface Unglueable { fn unglue(a: Glue); fn mar(): Glue }", vec![decl(Declaration::Interface { name: rc("Unglueable"), @@ -955,13 +965,13 @@ fn impls() { ), ]; - assert_ast_comb!( + assert_ast!( "impl Heh { fn yolo() { }; fn swagg() { } }", vec![decl(Impl { type_name: ty_simple("Heh"), interface_name: None, block: block.clone() })] ); //TODO `"impl Heh { fn yolo() { }; fn swagg() { }; }"` ought to work - assert_ast_comb!( + assert_ast!( "impl Heh { fn yolo() { }; fn swagg() { } }", vec![decl(Impl { type_name: TypeIdentifier::Singleton(TypeSingletonName { @@ -973,7 +983,7 @@ fn impls() { })] ); - assert_ast_comb!( + assert_ast!( "impl Heh for Saraz { fn yolo() {}; fn swagg() {} }", vec![decl(Impl { type_name: ty_simple("Saraz"), @@ -982,7 +992,7 @@ fn impls() { })] ); - assert_ast_comb!( + assert_ast!( "impl Heh for (Int, Codepoint) {}", vec![decl(Impl { type_name: TypeIdentifier::Tuple(vec![ty_simple("Int"), ty_simple("Codepoint")]), @@ -1001,7 +1011,7 @@ fn annotations() { vec![].into(), )); - assert_ast_comb! { + assert_ast! { r#" @test_annotation fn some_function() { @@ -1015,7 +1025,7 @@ fn annotations() { ] }; - assert_ast_comb! { + assert_ast! { r#" @test_annotation(some,value) @another_annotation @@ -1035,7 +1045,7 @@ fn annotations() { #[test] fn modules() { - assert_ast_comb! { + assert_ast! { r#" module ephraim { let mut a = 10 @@ -1058,7 +1068,7 @@ fn modules() { #[test] fn imports() { - assert_ast_comb! { + assert_ast! { "import harbinger::draughts::Norgleheim", vec![stmt(StatementKind::Import(ImportSpecifier { id: ItemId::default(), @@ -1067,7 +1077,7 @@ fn imports() { }))] }; - assert_ast_comb! { + assert_ast! { "import harbinger::draughts::{Norgleheim, Xraksenlaigar}", vec![stmt(StatementKind::Import(ImportSpecifier { id: ItemId::default(), @@ -1078,7 +1088,7 @@ fn imports() { }; //TODO this shouldn't be legal - assert_ast_comb! { + assert_ast! { "import bespouri::{}", vec![stmt(StatementKind::Import(ImportSpecifier { id: Default::default(), @@ -1087,7 +1097,7 @@ fn imports() { }))] }; - assert_ast_comb! { + assert_ast! { "import bespouri::*", vec![stmt(StatementKind::Import(ImportSpecifier { id: Default::default(), @@ -1302,7 +1312,7 @@ fn flow_control() { return 10; }"#; - assert_ast_comb!( + assert_ast!( source, vec![fn_decl( Signature { name: rc("test"), operator: false, type_anno: None, params: vec![] }, @@ -1413,7 +1423,10 @@ fn comments() { assert_fail_expr!(source, binop("+", expr(NatLiteral(1)), expr(NatLiteral(2)))); let source = "5//no man\n"; - assert_ast_comb!(source, vec![exst(NatLiteral(5))]); + assert_ast!(source, vec![exst(NatLiteral(5))]); + + let source = " /*yolo*/ barnaby"; + assert_ast!(source, exst(ExpressionKind::Value(qn!(barnaby)))); } //TODO support backtick operators like this