use std::rc::Rc; use nom::{ branch::alt, bytes::complete::{escaped_transform, tag, take_while}, character::{ complete::{alpha1, char, line_ending, none_of, not_line_ending, one_of, space1}, is_alphanumeric, }, combinator::{cut, eof, map, not, opt, peek, recognize, value, verify}, error::{context, ErrorKind, ParseError, VerboseError}, multi::{many0, many1, separated_list0, separated_list1}, sequence::{delimited, pair, preceded, separated_pair, terminated, tuple}, IResult, InputIter, InputLength, InputTake, Parser, Slice, }; use nom_locate::{position, LocatedSpan}; use crate::{ identifier::{Id, IdStore}, parsing::StoreRef, }; pub type Span<'a> = LocatedSpan<&'a str, StoreRef>; pub type ParseResult<'a, O> = IResult, O, VerboseError>>; use crate::ast::*; fn rc_string(s: &str) -> Rc { Rc::new(s.to_string()) } fn is_keyword(input: &str) -> bool { let keywords = [ "if", "then", "else", "is", "fn", "for", "while", "in", "true", "false", "let", "in", "mut", "return", "break", "continue", "type", "alias", "self", "Self", "interface", "impl", "module", "import", ]; keywords.iter().any(|kw| kw == &input) } fn fresh_id(span: &Span) -> Id { let mut table_handle = span.extra.borrow_mut(); table_handle.fresh() } fn tok<'a, O>( input_parser: impl Parser, O, VerboseError>>, ) -> impl FnMut(Span<'a>) -> IResult, O, VerboseError>> { context("token", preceded(ws0, input_parser)) } fn toknl<'a, O>( input_parser: impl Parser, O, VerboseError>>, ) -> impl FnMut(Span<'a>) -> IResult, O, VerboseError>> { context("token/newline", preceded(pair(many0(tok(statement_delimiter)), ws0), input_parser)) } fn kw<'a>(keyword_str: &'static str) -> impl FnMut(Span<'a>) -> ParseResult<()> { context("keyword", tok(value((), tag(keyword_str)))) } // whitespace does consume at least one piece of whitespace - use ws0 for maybe none fn whitespace(input: Span) -> ParseResult<()> { context("whitespace", alt((preceded(peek(tag("/*")), block_comment), line_comment, value((), space1))))( input, ) } fn ws0(input: Span) -> ParseResult<()> { context("WS0", value((), many0(whitespace)))(input) } fn line_comment(input: Span) -> ParseResult<()> { value((), tuple((tag("//"), not_line_ending)))(input) } fn block_comment(input: Span) -> ParseResult<()> { fn inner_parser(mut input: Span) -> ParseResult<()> { loop { let mut iter = input.iter_indices(); while let Some((idx, _ch)) = iter.next() { if idx + 2 > input.input_len() { return Err(nom::Err::Failure(VerboseError::from_error_kind(input, ErrorKind::Verify))); } if input.slice(idx..idx + 2).fragment() == &"/*" { let (rest, _seen) = input.take_split(idx); let (rest, ()) = block_comment(rest)?; input = rest; break; } if input.slice(idx..idx + 2).fragment() == &"*/" { let (rest, _seen) = input.take_split(idx); return Ok((rest, ())); } } } } context("block-comment", value((), tuple((tag("/*"), inner_parser, tag("*/")))))(input) } fn statement_delimiter(input: Span) -> ParseResult<()> { context("statement-delimiter", tok(alt((value((), line_ending), value((), char(';'))))))(input) } pub fn program(input: Span) -> ParseResult { let id = fresh_id(&input); let (rest, statements) = context( "AST", terminated( map( delimited( many0(statement_delimiter), separated_list0(many1(statement_delimiter), statement), many0(statement_delimiter), ), |items| items.into(), ), tok(eof), ), )(input)?; let ast = AST { id, statements }; Ok((rest, ast)) } fn block_template<'a, O, O2>( delimiter: impl Parser, O2, VerboseError>>, input_parser: impl Parser, O, VerboseError>>, ) -> impl FnMut(Span<'a>) -> IResult, Vec, VerboseError>> { delimited( pair(tok(char('{')), many0(statement_delimiter)), cut(separated_list0(delimiter, input_parser)), pair(many0(statement_delimiter), tok(char('}'))), ) } pub fn block(input: Span) -> ParseResult { context("block", map(block_template(many1(statement_delimiter), statement), |items| items.into()))(input) } fn statement(input: Span) -> ParseResult { let (input, pos) = position(input)?; let location = pos.location_offset().into(); let id = fresh_id(&input); let (rest, kind) = context( "Parsing-statement", alt(( map(flow, StatementKind::Flow), map(import, StatementKind::Import), map(declaration, StatementKind::Declaration), map(expression, StatementKind::Expression), )), )(input)?; Ok((rest, Statement { id, location, kind })) } fn import(input: Span) -> ParseResult { fn path_components(input: Span) -> ParseResult>> { map( tuple((opt(tag("::")), identifier, many0(preceded(tag("::"), identifier_span)))), |(_maybe_root, first, rest)| { let mut components = vec![rc_string(first.fragment())]; components.extend(rest.into_iter().map(|n| rc_string(n.fragment()))); components }, )(input) } fn import_suffix(input: Span) -> ParseResult { alt(( value(ImportedNames::All, tag("::*")), map( preceded( tag("::"), delimited(char('{'), cut(separated_list1(tok(char(',')), identifier)), char('}')), ), |names| ImportedNames::List(names.into_iter().map(|n| rc_string(n.fragment())).collect()), ), ))(input) } let id = fresh_id(&input); map( preceded(kw("import"), cut(pair(path_components, opt(import_suffix)))), move |(path_components, suffix)| ImportSpecifier { id, path_components, imported_names: suffix.unwrap_or(ImportedNames::LastOfPath), }, )(input) } fn flow(input: Span) -> ParseResult { alt(( map(kw("continue"), |_| FlowControl::Continue), map(kw("break"), |_| FlowControl::Break), map(preceded(kw("return"), opt(expression)), FlowControl::Return), ))(input) } fn declaration(input: Span) -> ParseResult { alt((binding, type_decl, func, annotation, module, interface, implementation))(input) } fn implementation(input: Span) -> ParseResult { alt(( map( preceded(kw("impl"), tuple((type_singleton_name, kw("for"), type_identifier, decl_block))), |(if_name, _, type_name, block)| Declaration::Impl { type_name, interface_name: Some(if_name), block, }, ), map(preceded(kw("impl"), pair(type_identifier, decl_block)), |(type_name, block)| { Declaration::Impl { type_name, interface_name: None, block } }), ))(input) } fn decl_block(input: Span) -> ParseResult> { block_template(many1(statement_delimiter), func_decl)(input) } fn interface(input: Span) -> ParseResult { map(preceded(kw("interface"), pair(identifier, signature_block)), |(name, signatures)| { Declaration::Interface { name: rc_string(name.fragment()), signatures } })(input) } fn signature_block(input: Span) -> ParseResult> { block_template(many1(statement_delimiter), func_signature)(input) } fn annotation(input: Span) -> ParseResult { map( tuple(( tok(char('@')), identifier, opt(delimited(tok(char('(')), separated_list1(tok(char(',')), expression), tok(char(')')))), statement_delimiter, statement, )), |(_, name, args, _, inner)| Declaration::Annotation { name: rc_string(name.fragment()), arguments: if let Some(args) = args { args } else { vec![] }, inner: Box::new(inner), }, )(input) } fn func(input: Span) -> ParseResult { alt((func_decl, map(func_signature, Declaration::FuncSig)))(input) } fn func_decl(input: Span) -> ParseResult { map(pair(func_signature, block), |(sig, decl)| Declaration::FuncDecl(sig, decl))(input) } fn func_signature(input: Span) -> ParseResult { let normal_fn = context("ordinary-fn", tuple((identifier, formal_params, opt(type_anno)))); let operator_fn = context( "operator-fn", tuple((delimited(tok(char('(')), operator, tok(char(')'))), formal_params, opt(type_anno))), ); context( "func-signature", preceded( kw("fn"), cut(alt(( map(normal_fn, |(name, params, type_anno)| Signature { name: rc_string(name.fragment()), operator: false, params, type_anno, }), map(operator_fn, |(op, params, type_anno)| Signature { name: rc_string(op.sigil()), operator: true, params, type_anno, }), ))), ), )(input) } fn formal_params(input: Span) -> ParseResult> { context( "formal-params", verify( delimited(tok(char('(')), separated_list0(tok(char(',')), formal_param), tok(char(')'))), |params: &Vec<_>| params.len() < 256, ), )(input) } fn formal_param(input: Span) -> ParseResult { map( tuple((identifier, opt(type_anno), opt(preceded(tok(char('=')), expression)))), |(name, anno, default)| FormalParam { name: rc_string(name.fragment()), anno, default }, )(input) } fn type_decl(input: Span) -> ParseResult { context( "type-decl", alt(( map( tuple((kw("type"), kw("alias"), identifier, tok(char('=')), identifier)), |(_, _, alias, _, name)| Declaration::TypeAlias { alias: rc_string(alias.fragment()), original: rc_string(name.fragment()), }, ), map( tuple((kw("type"), opt(kw("mut")), type_singleton_name, tok(char('=')), type_body)), |(_, mutable, name, _, body)| Declaration::TypeDecl { name, body, mutable: mutable.is_some(), }, ), )), )(input) } fn type_body(input: Span) -> ParseResult { let id = fresh_id(&input); context( "type-body", alt(( map(record_variant, move |fields| TypeBody::ImmediateRecord { id, fields }), map(separated_list0(tok(char('|')), variant_spec), TypeBody::Variants), )), )(input) } fn record_variant(input: Span) -> ParseResult, TypeIdentifier)>> { context( "record-variant", delimited( pair(tok(char('{')), many0(statement_delimiter)), terminated(separated_list1(toknl(char(',')), toknl(record_variant_item)), opt(toknl(char(',')))), pair(many0(statement_delimiter), tok(char('}'))), ), )(input) } fn variant_spec(input: Span) -> ParseResult { fn tuple_variant(input: Span) -> ParseResult { map( delimited(tok(char('(')), separated_list1(tok(char(',')), type_identifier), tok(char(')'))), VariantKind::TupleStruct, )(input) } let id = fresh_id(&input); let (rest, (name, kind)) = alt(( pair(identifier, map(record_variant, VariantKind::Record)), pair(identifier, tuple_variant), map(identifier, |ident| (ident, VariantKind::UnitStruct)), ))(input)?; Ok((rest, Variant { id, name: rc_string(name.fragment()), kind })) } fn record_variant_item(input: Span) -> ParseResult<(Rc, TypeIdentifier)> { context( "record-variant-item", map(tuple((identifier, tok(char(':')), type_identifier)), |(name, _, ty)| { (rc_string(name.fragment()), ty) }), )(input) } fn binding(input: Span) -> ParseResult { let parser = tuple((kw("let"), opt(kw("mut")), identifier, opt(type_anno), tok(char('=')), expression)); map(parser, |(_, maybe_mut, ident, type_anno, _, expr)| Declaration::Binding { name: rc_string(ident.fragment()), constant: maybe_mut.is_none(), type_anno, expr, })(input) } fn module(input: Span) -> ParseResult { map(tuple((kw("module"), identifier, block)), |(_, name, items)| Declaration::Module { name: rc_string(name.fragment()), items, })(input) } pub fn expression(input: Span) -> ParseResult { let id = fresh_id(&input); map(pair(expression_kind(true), opt(type_anno)), move |(kind, type_anno)| Expression { id, type_anno, kind, })(input) } fn expression_no_struct(input: Span) -> ParseResult { let id = fresh_id(&input); map(pair(expression_kind(false), opt(type_anno)), move |(kind, type_anno)| Expression { id, type_anno, kind, })(input) } fn expr_or_block(input: Span) -> ParseResult { let (input, pos) = position(input)?; let id = fresh_id(&input); let location = pos.location_offset().into(); alt(( block, map(expression, move |expr| Statement { id, location, kind: StatementKind::Expression(expr) }.into()), ))(input) } fn type_anno(input: Span) -> ParseResult { preceded(tok(char(':')), type_identifier)(input) } fn type_identifier(input: Span) -> ParseResult { alt(( map( delimited(tok(char('(')), separated_list0(tok(char(',')), type_identifier), tok(char(')'))), TypeIdentifier::Tuple, ), map(type_singleton_name, TypeIdentifier::Singleton), ))(input) } fn type_singleton_name(input: Span) -> ParseResult { map(pair(identifier, opt(type_params)), |(name, params)| TypeSingletonName { name: rc_string(name.fragment()), params: if let Some(params) = params { params } else { vec![] }, })(input) } fn type_params(input: Span) -> ParseResult> { delimited(tok(char('<')), separated_list1(tok(char(',')), type_identifier), tok(char('>')))(input) } pub fn expression_kind(allow_struct: bool) -> impl FnMut(Span) -> ParseResult { move |input: Span| context("expression-kind", precedence_expr(allow_struct))(input) } fn precedence_expr(allow_struct: bool) -> impl FnMut(Span) -> ParseResult { move |input: Span| { let handle = input.extra.clone(); let precedence_continuation = pair(operator, prefix_expr(allow_struct)); map( pair(prefix_expr(allow_struct), many0(precedence_continuation)), move |(first, rest): (ExpressionKind, Vec<(BinOp, ExpressionKind)>)| { let mut handle_ref = handle.borrow_mut(); BinopSequence { first, rest }.do_precedence(&mut handle_ref) }, )(input) } } fn operator(input: Span) -> ParseResult { context( "operator", tok(map( preceded(cut(not(tag("*/"))), recognize(many1(one_of("+-*/%<>=!$&|?^`")))), |sigil_span: Span| BinOp::from_sigil(sigil_span.fragment()), )), )(input) } fn prefix_op(input: Span) -> ParseResult { tok(map(recognize(one_of("+-!")), |sigil: Span| PrefixOp::from_sigil(sigil.fragment())))(input) } fn prefix_expr(allow_struct: bool) -> impl FnMut(Span) -> ParseResult { move |input: Span| { let id = fresh_id(&input); context( "prefix-expr", map(pair(opt(prefix_op), extended_expr(allow_struct)), move |(prefix, expr)| { if let Some(prefix) = prefix { let expr = Expression::new(id, expr); ExpressionKind::PrefixExp(prefix, Box::new(expr)) } else { expr } }), )(input) } } #[derive(Debug)] enum ExtendedPart<'a> { Index(Vec), Call(Vec), Accessor(&'a str), } fn extended_expr(allow_struct: bool) -> impl FnMut(Span) -> ParseResult { move |input: Span| { let (s, (primary, parts)) = context("extended-expr", pair(primary_expr(allow_struct), many0(extended_expr_part)))(input)?; let mut expression = Expression::new(fresh_id(&s), primary); for part in parts.into_iter() { let kind = match part { ExtendedPart::Index(indexers) => ExpressionKind::Index { indexee: Box::new(expression), indexers }, ExtendedPart::Call(arguments) => ExpressionKind::Call { f: Box::new(expression), arguments }, ExtendedPart::Accessor(name) => { let name = rc_string(name); ExpressionKind::Access { name, expr: Box::new(expression) } } }; expression = Expression::new(fresh_id(&s), kind); } Ok((s, expression.kind)) } } fn extended_expr_part(input: Span) -> ParseResult { fn index_part(input: Span) -> ParseResult> { delimited(tok(char('[')), cut(separated_list1(tok(char(',')), expression)), tok(char(']')))(input) } fn call_part(input: Span) -> ParseResult> { context( "call-part", verify( //TODO generalize this `not` delimited( tok(char('(')), separated_list0(tok(char(',')), preceded(not(tok(char(')'))), invocation_argument)), tok(char(')')), ), |output: &Vec<_>| output.len() <= 255, ), )(input) } fn access_part(input: Span) -> ParseResult<&str> { preceded(tok(char('.')), map(identifier, |item| *item.fragment()))(input) } alt(( map(index_part, ExtendedPart::Index), map(call_part, ExtendedPart::Call), map(access_part, ExtendedPart::Accessor), ))(input) } fn invocation_argument(input: Span) -> ParseResult { context( "invocation-argument", cut(alt(( map(tok(char('_')), |_| InvocationArgument::Ignored), map(tuple((identifier, tok(char('=')), expression)), |(name, _, expr)| { InvocationArgument::Keyword { name: rc_string(name.fragment()), expr } }), map(expression, InvocationArgument::Positional), ))), )(input) } fn primary_expr(allow_struct: bool) -> impl FnMut(Span) -> ParseResult { move |input: Span| { if allow_struct { context("primary-expr", alt((primary_expr_no_struct, named_struct, identifier_expr)))(input) } else { context("primary-expr", alt((primary_expr_no_struct, identifier_expr)))(input) } } } fn primary_expr_no_struct(input: Span) -> ParseResult { context( "primary-expr-no-struct", alt(( while_expr, for_expr, if_expr, lambda_expr, list_expr, paren_expr, bool_literal, float_literal, number_literal, string_literal, )), )(input) } fn named_struct(input: Span) -> ParseResult { context( "named-struct", map(pair(qualified_identifier, record_block), |(name, fields)| ExpressionKind::NamedStruct { name, fields, }), )(input) } //TODO support anonymous structs and Elm-style update syntax for structs fn record_block(input: Span) -> ParseResult, Expression)>> { let record_entry = separated_pair(map(identifier, |span| rc_string(span.fragment())), tok(char(':')), expression); delimited(tok(char('{')), separated_list0(tok(char(',')), record_entry), tok(char('}')))(input) } fn lambda_expr(input: Span) -> ParseResult { context( "lambda-expr", preceded( tok(char('\\')), alt(( map(tuple((formal_params, opt(type_anno), block)), |(params, type_anno, body)| { ExpressionKind::Lambda { params, type_anno, body } }), map(tuple((formal_param, opt(type_anno), block)), |(param, type_anno, body)| { ExpressionKind::Lambda { params: vec![param], type_anno, body } }), )), ), )(input) } fn while_expr(input: Span) -> ParseResult { context( "while-expr", map(preceded(kw("while"), pair(opt(expression_no_struct), block)), move |(condition, body)| { ExpressionKind::WhileExpression { condition: condition.map(Box::new), body } }), )(input) } fn if_expr(input: Span) -> ParseResult { fn else_case(input: Span) -> ParseResult> { opt(preceded(kw("else"), expr_or_block))(input) } fn cond_block(input: Span) -> ParseResult { map(block_template(many1(statement_delimiter), cond_arm), IfExpressionBody::CondList)(input) } fn cond_arm(input: Span) -> ParseResult { let condition = map(preceded(kw("is"), pattern), Condition::Pattern); let condition_guard = opt(preceded(kw("if"), expression)); alt(( map(preceded(kw("else"), expr_or_block), |body| ConditionArm { condition: Condition::Else, guard: None, body, }), map( tuple((condition, condition_guard, kw("then"), expr_or_block)), |(condition, guard, _, body)| ConditionArm { condition, guard, body }, ), ))(input) } fn simple_pattern_match(input: Span) -> ParseResult { map( tuple((preceded(kw("is"), pattern), preceded(kw("then"), pair(expr_or_block, else_case)))), |(pattern, (then_case, else_case))| IfExpressionBody::SimplePatternMatch { pattern, then_case, else_case, }, )(input) } fn simple_conditional(input: Span) -> ParseResult { map(preceded(kw("then"), pair(expr_or_block, else_case)), |(then_case, else_case)| { IfExpressionBody::SimpleConditional { then_case, else_case } })(input) } fn if_expr_body(input: Span) -> ParseResult { alt((cond_block, simple_pattern_match, simple_conditional))(input) } context( "if-expr", map(preceded(kw("if"), pair(opt(expression_no_struct), if_expr_body)), |(discriminator, body)| { ExpressionKind::IfExpression { discriminator: discriminator.map(Box::new), body: Box::new(body) } }), )(input) } fn pattern(input: Span) -> ParseResult { alt(( map( delimited(tok(char('(')), separated_list1(tok(char(',')), pattern), tok(char(')'))), Pattern::TuplePattern, ), simple_pattern, ))(input) } fn simple_pattern(input: Span) -> ParseResult { fn record_pattern_entry(input: Span) -> ParseResult<(Rc, Pattern)> { let id = fresh_id(&input); alt(( map(separated_pair(identifier, tok(char(':')), pattern), |(ident, pat)| { (rc_string(ident.fragment()), pat) }), map(identifier, move |ident| { let qn = QualifiedName { id, components: vec![rc_string(ident.fragment())] }; (rc_string(ident.fragment()), Pattern::VarOrName(qn)) }), ))(input) } alt(( pattern_literal, map( pair( qualified_identifier, delimited(tok(char('(')), separated_list0(tok(char(',')), pattern), tok(char(')'))), ), |(qn, members)| Pattern::TupleStruct(qn, members), ), map( pair( qualified_identifier, delimited( tok(char('{')), separated_list0(tok(char(',')), record_pattern_entry), tok(char('}')), ), ), |(qn, members)| Pattern::Record(qn, members), ), map(qualified_identifier, Pattern::VarOrName), ))(input) } fn pattern_literal(input: Span) -> ParseResult { alt(( value(Pattern::Ignored, kw("_")), value(Pattern::Literal(PatternLiteral::BoolPattern(true)), kw("true")), value(Pattern::Literal(PatternLiteral::BoolPattern(false)), kw("false")), map(tok(bare_string_literal), |s| Pattern::Literal(PatternLiteral::StringPattern(Rc::new(s)))), map(pair(opt(tok(char('-'))), alt((float_literal, number_literal))), |(sign, num)| { Pattern::Literal(PatternLiteral::NumPattern { neg: sign.is_some(), num }) }), ))(input) } fn for_expr(input: Span) -> ParseResult { fn for_enumerators(input: Span) -> ParseResult> { alt(( delimited(tok(char('{')), separated_list0(tok(char(',')), enumerator), tok(char('}'))), map(enumerator, |enumerator| vec![enumerator]), ))(input) } //TODO add guards, etc. fn enumerator(input: Span) -> ParseResult { alt(( map(separated_pair(identifier, kw("<-"), expression_no_struct), |(ident, generator)| { Enumerator { identifier: rc_string(ident.fragment()), generator, assignment: false } }), //TODO distinguish these two cases in AST map(separated_pair(identifier, kw("="), expression_no_struct), |(ident, generator)| Enumerator { identifier: rc_string(ident.fragment()), generator, assignment: true, }), ))(input) } fn for_body(input: Span) -> ParseResult> { alt(( preceded(kw("return"), map(expression_no_struct, |expr| Box::new(ForBody::MonadicReturn(expr)))), map(block, |body| Box::new(ForBody::StatementBlock(body))), ))(input) } context( "for-expr", map(preceded(kw("for"), pair(for_enumerators, for_body)), |(enumerators, body)| { ExpressionKind::ForExpression { enumerators, body } }), )(input) } fn paren_expr(input: Span) -> ParseResult { delimited( tok(char('(')), map(separated_list0(tok(char(',')), expression), |mut exprs| match exprs.len() { 1 => exprs.pop().unwrap().kind, _ => ExpressionKind::TupleLiteral(exprs), }), tok(char(')')), )(input) } fn list_expr(input: Span) -> ParseResult { map(delimited(tok(char('[')), separated_list0(tok(char(',')), expression), tok(char(']'))), |items| { ExpressionKind::ListLiteral(items) })(input) } fn string_literal(input: Span) -> ParseResult { context( "string-literal", tok(map(pair(opt(identifier), bare_string_literal), |(prefix, s)| ExpressionKind::StringLiteral { s: Rc::new(s), prefix: prefix.map(|s| rc_string(s.fragment())), })), )(input) } fn bare_string_literal(input: Span) -> ParseResult { let string_escape_transforms = alt(( value('\\', tag("\\")), value('"', tag("\"")), value('\n', tag("n")), value('\t', tag("t")), map(delimited(tag(r#"u{"#), recognize(digit_group_hex), tag("}")), |value| { char::from_u32(u32::from_str_radix(value.fragment(), 16).unwrap()).unwrap() }), )); alt(( map(tag(r#""""#), |_| String::new()), preceded( peek(char('"')), cut(delimited( char('"'), escaped_transform(none_of(r#""\"#), '\\', string_escape_transforms), char('"'), )), ), ))(input) } fn identifier_expr(input: Span) -> ParseResult { context("identifier-expr", map(qualified_identifier, ExpressionKind::Value))(input) } fn qualified_identifier(input: Span) -> ParseResult { let id = fresh_id(&input); tok(map(separated_list1(tag("::"), map(identifier_span, |x| rc_string(x.fragment()))), move |items| { QualifiedName { id, components: items } }))(input) } fn identifier(input: Span) -> ParseResult { context("identifier", tok(identifier_span))(input) } fn identifier_span(input: Span) -> ParseResult { let (rest, parsed) = recognize(tuple(( alt((tag("_"), alpha1)), take_while(|ch: char| is_alphanumeric(ch as u8) || ch == '_'), )))(input.clone())?; if is_keyword(parsed.fragment()) { return Err(nom::Err::Failure(VerboseError::from_error_kind(input, nom::error::ErrorKind::Verify))); } Ok((rest, parsed)) } fn bool_literal(input: Span) -> ParseResult { context( "bool-literal", alt(( map(kw("true"), |_| ExpressionKind::BoolLiteral(true)), map(kw("false"), |_| ExpressionKind::BoolLiteral(false)), )), )(input) } fn float_literal(input: Span) -> ParseResult { context( "float-literal", tok(map( alt(( recognize(tuple((digits(digit_group_dec), char('.'), opt(digits(digit_group_dec))))), recognize(tuple((char('.'), digits(digit_group_dec)))), )), |ds| ExpressionKind::FloatLiteral(ds.fragment().parse().unwrap()), )), )(input) } fn number_literal(input: Span) -> ParseResult { context( "number-literal", map(alt((tok(hex_literal), tok(bin_literal), tok(dec_literal))), ExpressionKind::NatLiteral), )(input) } fn dec_literal(input: Span) -> ParseResult { map(digits(digit_group_dec), |chars: Vec| { let s: String = chars.into_iter().collect(); s.parse().unwrap() })(input) } fn hex_literal(input: Span) -> ParseResult { map(preceded(alt((tag("0x"), tag("0X"))), digits(digit_group_hex)), |chars: Vec| { let s: String = chars.into_iter().collect(); parse_hex(&s).unwrap() })(input) } fn bin_literal(input: Span) -> ParseResult { map(preceded(alt((tag("0b"), tag("0B"))), digits(digit_group_bin)), |chars: Vec| { let s: String = chars.into_iter().collect(); parse_binary(&s).unwrap() })(input) } fn digits<'a, E: ParseError>>( digit_type: impl Parser, Vec, E>, ) -> impl FnMut(Span<'a>) -> IResult, Vec, E> { map(separated_list1(many1(char('_')), digit_type), |items: Vec>| { items.into_iter().flatten().collect() }) } fn digit_group_dec(input: Span) -> ParseResult> { many1(one_of("0123456789"))(input) } fn digit_group_hex(input: Span) -> ParseResult> { many1(one_of("0123456789abcdefABCDEF"))(input) } fn digit_group_bin(input: Span) -> ParseResult> { many1(one_of("01"))(input) } fn parse_binary(digits: &str) -> Result { let mut result: u64 = 0; let mut multiplier = 1; for d in digits.chars().rev() { match d { '1' => result += multiplier, '0' => (), '_' => continue, _ => unreachable!(), } multiplier = match multiplier.checked_mul(2) { Some(m) => m, None => return Err("Binary expression will overflow"), } } Ok(result) } fn parse_hex(digits: &str) -> Result { let mut result: u64 = 0; let mut multiplier: u64 = 1; for d in digits.chars().rev() { if d == '_' { continue; } match d.to_digit(16) { Some(n) => result += n as u64 * multiplier, None => return Err("Internal parser error: invalid hex digit"), } multiplier = match multiplier.checked_mul(16) { Some(m) => m, None => return Err("Hexadecimal expression will overflow"), } } Ok(result) } #[derive(Debug)] struct BinopSequence { first: ExpressionKind, rest: Vec<(BinOp, ExpressionKind)>, } impl BinopSequence { fn do_precedence(self, store: &mut IdStore) -> ExpressionKind { fn helper( precedence: i32, lhs: ExpressionKind, rest: &mut Vec<(BinOp, ExpressionKind)>, store: &mut IdStore, ) -> Expression { let mut lhs = Expression::new(store.fresh(), lhs); while let Some((next_op, next_rhs)) = rest.pop() { let new_precedence = next_op.get_precedence(); if precedence >= new_precedence { rest.push((next_op, next_rhs)); break; } let rhs = helper(new_precedence, next_rhs, rest, store); lhs = Expression::new( store.fresh(), ExpressionKind::BinExp(next_op, Box::new(lhs), Box::new(rhs)), ); } lhs } let mut as_stack = self.rest.into_iter().rev().collect(); helper(BinOp::min_precedence(), self.first, &mut as_stack, store).kind } } #[cfg(test)] mod test { use std::cell::RefCell; use pretty_assertions::assert_eq; use super::*; macro_rules! span { ($func:expr, $input:expr) => {{ let id_store: IdStore = IdStore::new(); let span = Span::new_extra($input, Rc::new(RefCell::new(id_store))); $func(span).map(|(span, x)| (*span.fragment(), x)) }}; } #[test] fn combinator_test1() { assert_eq!(span!(digits(digit_group_dec), "342"), Ok(("", vec!['3', '4', '2']))); assert_eq!(span!(bin_literal, "0b1111qsdf"), Ok(("qsdf", 15))); assert_eq!(span!(bare_string_literal, r#""fah""#), Ok(("", "fah".to_string()))); assert_eq!(span!(bare_string_literal, r#""""#), Ok(("", "".to_string()))); assert_eq!(*span!(identifier_span, "modulek").unwrap().1.fragment(), "modulek"); assert!(span!(identifier_span, "module").is_err()); } #[test] fn combinator_test_ws0() { assert_eq!(span!(block_comment, "/*yolo*/ "), Ok((" ", ()))); assert_eq!(span!(block_comment, "/*yolo*/ jumpy /*nah*/ "), Ok((" jumpy /*nah*/ ", ()))); assert_eq!(span!(ws0, " /* yolo */"), Ok(("", ()))); assert_eq!(span!(ws0, "/* /* no */ yolo */ "), Ok(("", ()))); } #[test] fn combinator_test2() { for s in [" 15", " 0b1111", " 1_5_", "0XF__", "0Xf"].iter() { assert_eq!(span!(expression_kind(true), s).unwrap().1, ExpressionKind::NatLiteral(15)); } assert_eq!( span!(expression_kind(true), " /*gay*/ true").unwrap().1, ExpressionKind::BoolLiteral(true) ); } }