schala/schala-lang/src/parsing/combinator.rs

640 lines
21 KiB
Rust
Raw Normal View History

2021-11-17 12:45:55 -08:00
use std::{cell::RefCell, rc::Rc};
2021-11-16 20:23:27 -08:00
use nom::{
branch::alt,
2021-11-18 17:16:39 -08:00
bytes::complete::{escaped_transform, tag, take_till, take_while},
character::{
complete::{
alpha1, alphanumeric0, char, line_ending, none_of, not_line_ending, one_of, space0, space1,
},
is_alphanumeric,
2021-11-17 12:45:55 -08:00
},
combinator::{map, not, opt, peek, recognize, value},
error::{context, ParseError, VerboseError},
multi::{fold_many1, many0, many1, separated_list0, separated_list1},
2021-11-18 03:32:01 -08:00
sequence::{delimited, pair, preceded, tuple},
2021-11-17 12:45:55 -08:00
Err, IResult, Parser,
2021-11-16 20:23:27 -08:00
};
2021-11-17 03:59:16 -08:00
use nom_locate::{position, LocatedSpan};
2021-11-17 12:45:55 -08:00
2021-11-17 12:45:42 -08:00
use crate::identifier::{Id, IdStore};
2021-11-16 20:23:27 -08:00
2021-11-17 12:45:42 -08:00
type StoreRef = Rc<RefCell<IdStore<ASTItem>>>;
2021-11-17 16:53:03 -08:00
pub type Span<'a> = LocatedSpan<&'a str, StoreRef>;
2021-11-17 03:59:16 -08:00
type ParseResult<'a, O> = IResult<Span<'a>, O, VerboseError<Span<'a>>>;
2021-11-16 20:23:27 -08:00
use crate::ast::*;
2021-11-17 01:54:35 -08:00
fn rc_string(s: &str) -> Rc<String> {
Rc::new(s.to_string())
2021-11-16 20:23:27 -08:00
}
2021-11-17 12:45:42 -08:00
fn fresh_id(span: &Span) -> Id<ASTItem> {
let mut table_handle = span.extra.borrow_mut();
table_handle.fresh()
}
2021-11-16 20:23:27 -08:00
2021-11-17 16:44:09 -08:00
fn fresh_id_rc(store_ref: &StoreRef) -> Id<ASTItem> {
let mut table_handle = store_ref.borrow_mut();
table_handle.fresh()
}
2021-11-17 12:45:55 -08:00
fn tok<'a, O>(
input_parser: impl Parser<Span<'a>, O, VerboseError<Span<'a>>>,
) -> impl FnMut(Span<'a>) -> IResult<Span<'a>, O, VerboseError<Span<'a>>> {
context("tok", map(tuple((ws0, input_parser)), |(_, output)| output))
2021-11-17 01:54:35 -08:00
}
2021-11-16 20:23:27 -08:00
2021-11-17 03:59:16 -08:00
fn kw<'a>(keyword_str: &'static str) -> impl FnMut(Span<'a>) -> ParseResult<()> {
2021-11-17 12:45:55 -08:00
context("keyword", tok(value((), tag(keyword_str))))
2021-11-16 20:23:27 -08:00
}
// whitespace does consume at least one piece of whitespace - use ws0 for maybe none
2021-11-17 03:59:16 -08:00
fn whitespace(input: Span) -> ParseResult<()> {
2021-11-17 12:45:55 -08:00
context("whitespace", alt((block_comment, line_comment, value((), space1))))(input)
2021-11-16 20:23:27 -08:00
}
2021-11-17 03:59:16 -08:00
fn ws0(input: Span) -> ParseResult<()> {
2021-11-17 12:45:55 -08:00
context("WS0", value((), many0(whitespace)))(input)
2021-11-16 20:23:27 -08:00
}
2021-11-17 03:59:16 -08:00
fn line_comment(input: Span) -> ParseResult<()> {
2021-11-17 12:45:55 -08:00
value((), tuple((tag("//"), not_line_ending)))(input)
2021-11-16 20:23:27 -08:00
}
2021-11-17 03:59:16 -08:00
fn block_comment(input: Span) -> ParseResult<()> {
2021-11-17 12:45:55 -08:00
context(
"Block-comment",
value(
(),
tuple((
tag("/*"),
many0(alt((value((), none_of("*/")), value((), none_of("/*")), block_comment))),
tag("*/"),
)),
),
)(input)
2021-11-16 20:23:27 -08:00
}
2021-11-17 03:59:16 -08:00
fn statement_delimiter(input: Span) -> ParseResult<()> {
2021-11-17 12:45:55 -08:00
tok(alt((value((), line_ending), value((), char(';')))))(input)
2021-11-16 20:23:27 -08:00
}
2021-11-18 15:17:47 -08:00
pub fn program(input: Span) -> ParseResult<AST> {
let id = fresh_id(&input);
//TODO `rest` should be empty
2021-11-18 17:16:39 -08:00
let (rest, statements) = context(
"AST",
2021-11-18 15:17:47 -08:00
map(
2021-11-18 17:16:39 -08:00
tuple((
many0(statement_delimiter),
separated_list0(statement_delimiter, statement),
many0(statement_delimiter),
)),
|(_, items, _)| items.into(),
),
2021-11-18 15:17:47 -08:00
)(input)?;
2021-11-18 16:45:26 -08:00
println!("REST: {}", rest.fragment());
2021-11-18 15:17:47 -08:00
let ast = AST { id, statements };
Ok((rest, ast))
}
2021-11-18 01:37:05 -08:00
pub fn block(input: Span) -> ParseResult<Block> {
2021-11-17 12:45:55 -08:00
context(
"block",
map(
tuple((
tok(char('{')),
many0(statement_delimiter),
separated_list0(statement_delimiter, statement),
many0(statement_delimiter),
tok(char('}')),
)),
|(_, _, items, _, _)| items.into(),
),
)(input)
2021-11-16 20:23:27 -08:00
}
2021-11-17 03:59:16 -08:00
fn statement(input: Span) -> ParseResult<Statement> {
2021-11-17 12:45:42 -08:00
let (input, pos) = position(input)?;
2021-11-18 15:17:47 -08:00
let location = pos.location_offset().into();
2021-11-17 12:45:42 -08:00
let id = fresh_id(&input);
2021-11-18 15:17:47 -08:00
let (rest, kind) = context(
2021-11-17 12:45:55 -08:00
"Parsing-statement",
2021-11-18 17:16:39 -08:00
alt((map(declaration, StatementKind::Declaration), map(expression, StatementKind::Expression))),
2021-11-18 15:17:47 -08:00
)(input)?;
Ok((rest, Statement { id, location, kind }))
}
fn declaration(input: Span) -> ParseResult<Declaration> {
2021-11-18 18:24:06 -08:00
alt((binding, type_decl, func, module))(input)
}
fn func(input: Span) -> ParseResult<Declaration> {
alt((func_decl, map(func_signature, Declaration::FuncSig)))(input)
}
fn func_decl(input: Span) -> ParseResult<Declaration> {
map(pair(func_signature, block), |(sig, decl)| Declaration::FuncDecl(sig, decl))(input)
}
//TODO handle operators
fn func_signature(input: Span) -> ParseResult<Signature> {
map(tuple((kw("fn"), tok(identifier), formal_params, opt(type_anno))), |(_, name, params, type_anno)| {
Signature { name: rc_string(name.fragment()), operator: false, params, type_anno }
})(input)
}
fn formal_params(input: Span) -> ParseResult<Vec<FormalParam>> {
delimited(tok(char('(')), separated_list0(tok(char(',')), formal_param), tok(char(')')))(input)
}
//TODO support 256-limit
fn formal_param(input: Span) -> ParseResult<FormalParam> {
map(
tuple((tok(identifier), opt(type_anno), opt(preceded(tok(char('=')), expression)))),
|(name, anno, default)| FormalParam { name: rc_string(name.fragment()), anno, default },
)(input)
2021-11-18 17:16:39 -08:00
}
fn type_decl(input: Span) -> ParseResult<Declaration> {
alt((
map(
tuple((kw("type"), kw("alias"), tok(identifier), tok(char('=')), tok(identifier))),
|(_, _, alias, _, name)| Declaration::TypeAlias {
alias: rc_string(alias.fragment()),
original: rc_string(name.fragment()),
},
),
map(
tuple((kw("type"), opt(kw("mut")), type_singleton_name, tok(char('=')), type_body)),
|(_, mutable, name, _, body)| Declaration::TypeDecl { name, body, mutable: mutable.is_some() },
),
))(input)
}
fn type_body(input: Span) -> ParseResult<TypeBody> {
let id = fresh_id(&input);
alt((
map(
delimited(tok(char('{')), separated_list1(tok(char(',')), record_variant_item), tok(char('}'))),
2021-11-18 18:24:06 -08:00
move |items| TypeBody::ImmediateRecord { id, fields: items },
2021-11-18 17:16:39 -08:00
),
map(separated_list0(tok(char('|')), variant_spec), TypeBody::Variants),
))(input)
}
fn variant_spec(input: Span) -> ParseResult<Variant> {
fn record_variant(input: Span) -> ParseResult<VariantKind> {
map(
delimited(tok(char('{')), separated_list1(tok(char(',')), record_variant_item), tok(char('}'))),
VariantKind::Record,
)(input)
}
fn tuple_variant(input: Span) -> ParseResult<VariantKind> {
map(
delimited(tok(char('(')), separated_list1(tok(char(',')), type_identifier), tok(char(')'))),
VariantKind::TupleStruct,
)(input)
}
let id = fresh_id(&input);
let (rest, (name, kind)) = alt((
pair(tok(identifier), record_variant),
pair(tok(identifier), tuple_variant),
map(tok(identifier), |ident| (ident, VariantKind::UnitStruct)),
))(input)?;
Ok((rest, Variant { id, name: rc_string(name.fragment()), kind }))
}
fn record_variant_item(input: Span) -> ParseResult<(Rc<String>, TypeIdentifier)> {
map(tuple((tok(identifier), tok(char(':')), type_identifier)), |(name, _, ty)| {
(rc_string(name.fragment()), ty)
})(input)
2021-11-18 15:17:47 -08:00
}
fn binding(input: Span) -> ParseResult<Declaration> {
2021-11-18 17:16:39 -08:00
let parser =
tuple((kw("let"), opt(kw("mut")), tok(identifier), opt(type_anno), tok(char('=')), expression));
map(parser, |(_, maybe_mut, ident, type_anno, _, expr)| Declaration::Binding {
name: rc_string(ident.fragment()),
constant: maybe_mut.is_none(),
type_anno,
expr,
})(input)
2021-11-18 15:17:47 -08:00
}
fn module(input: Span) -> ParseResult<Declaration> {
2021-11-18 17:16:39 -08:00
map(tuple((kw("module"), tok(identifier), block)), |(_, name, items)| Declaration::Module {
name: rc_string(name.fragment()),
items,
})(input)
2021-11-16 20:23:27 -08:00
}
2021-11-17 16:53:03 -08:00
pub fn expression(input: Span) -> ParseResult<Expression> {
2021-11-17 12:45:42 -08:00
let id = fresh_id(&input);
2021-11-18 17:16:39 -08:00
map(pair(expression_kind, opt(type_anno)), move |(kind, type_anno)| Expression { id, type_anno, kind })(
input,
)
2021-11-17 03:40:43 -08:00
}
2021-11-17 03:59:16 -08:00
fn type_anno(input: Span) -> ParseResult<TypeIdentifier> {
2021-11-18 16:45:26 -08:00
preceded(tok(char(':')), type_identifier)(input)
2021-11-17 03:40:43 -08:00
}
2021-11-17 03:59:16 -08:00
fn type_identifier(input: Span) -> ParseResult<TypeIdentifier> {
2021-11-17 03:40:43 -08:00
alt((
2021-11-18 17:16:39 -08:00
map(
delimited(tok(char('(')), separated_list0(tok(char(',')), type_identifier), tok(char(')'))),
TypeIdentifier::Tuple,
),
2021-11-18 16:45:26 -08:00
map(type_singleton_name, TypeIdentifier::Singleton),
2021-11-17 03:40:43 -08:00
))(input)
}
2021-11-17 03:59:16 -08:00
fn type_singleton_name(input: Span) -> ParseResult<TypeSingletonName> {
2021-11-18 16:45:26 -08:00
map(pair(tok(identifier), opt(type_params)), |(name, params)| TypeSingletonName {
2021-11-18 17:16:39 -08:00
name: rc_string(name.fragment()),
params: if let Some(params) = params { params } else { vec![] },
2021-11-18 16:45:26 -08:00
})(input)
}
fn type_params(input: Span) -> ParseResult<Vec<TypeIdentifier>> {
2021-11-18 17:16:39 -08:00
delimited(tok(char('<')), separated_list1(tok(char(',')), type_identifier), tok(char('>')))(input)
2021-11-17 03:40:43 -08:00
}
2021-11-17 03:59:16 -08:00
pub fn expression_kind(input: Span) -> ParseResult<ExpressionKind> {
2021-11-18 04:01:40 -08:00
context("expression-kind", precedence_expr)(input)
2021-11-17 16:44:09 -08:00
}
fn precedence_expr(input: Span) -> ParseResult<ExpressionKind> {
2021-11-18 04:01:40 -08:00
let handle = input.extra.clone();
2021-11-17 16:53:03 -08:00
map(
pair(prefix_expr, many0(precedence_continuation)),
2021-11-18 04:01:40 -08:00
move |(first, rest): (ExpressionKind, Vec<(BinOp, ExpressionKind)>)| {
let mut handle_ref = handle.borrow_mut();
BinopSequence { first, rest }.do_precedence(&mut handle_ref)
2021-11-18 17:16:39 -08:00
},
)(input)
2021-11-17 16:44:09 -08:00
}
fn precedence_continuation(input: Span) -> ParseResult<(BinOp, ExpressionKind)> {
pair(operator, prefix_expr)(input)
}
fn operator(input: Span) -> ParseResult<BinOp> {
2021-11-17 16:53:03 -08:00
tok(map(
tuple((not(tag("*/")), recognize(many1(one_of("+-*/%<>=!$&|?^`"))))),
|(_, sigil_span): ((), Span)| BinOp::from_sigil(sigil_span.fragment()),
))(input)
2021-11-17 16:44:09 -08:00
}
fn prefix_op(input: Span) -> ParseResult<PrefixOp> {
2021-11-17 16:53:03 -08:00
tok(map(recognize(one_of("+-!")), |sigil: Span| PrefixOp::from_sigil(sigil.fragment())))(input)
2021-11-17 16:44:09 -08:00
}
fn prefix_expr(input: Span) -> ParseResult<ExpressionKind> {
let handle = input.extra.clone();
2021-11-17 16:53:03 -08:00
context(
"prefix-expr",
map(pair(opt(prefix_op), extended_expr), move |(prefix, expr)| {
if let Some(prefix) = prefix {
let expr = Expression::new(fresh_id_rc(&handle), expr);
ExpressionKind::PrefixExp(prefix, Box::new(expr))
} else {
expr
}
}),
)(input)
2021-11-17 16:44:09 -08:00
}
2021-11-18 14:38:29 -08:00
#[derive(Debug)]
enum ExtendedPart<'a> {
Index(Vec<Expression>),
Call(Vec<InvocationArgument>),
Accessor(&'a str),
}
2021-11-17 16:44:09 -08:00
fn extended_expr(input: Span) -> ParseResult<ExpressionKind> {
2021-11-18 17:16:39 -08:00
let (s, (primary, parts)) =
context("extended-expr", pair(primary_expr, many0(extended_expr_part)))(input)?;
2021-11-18 14:38:29 -08:00
let mut expression = Expression::new(fresh_id(&s), primary);
for part in parts.into_iter() {
let kind = match part {
2021-11-18 17:16:39 -08:00
ExtendedPart::Index(indexers) =>
ExpressionKind::Index { indexee: Box::new(expression), indexers },
ExtendedPart::Call(arguments) => ExpressionKind::Call { f: Box::new(expression), arguments },
2021-11-18 14:38:29 -08:00
ExtendedPart::Accessor(name) => {
let name = rc_string(name);
ExpressionKind::Access { name, expr: Box::new(expression) }
2021-11-18 17:16:39 -08:00
}
2021-11-18 14:38:29 -08:00
};
expression = Expression::new(fresh_id(&s), kind);
}
Ok((s, expression.kind))
2021-11-17 01:54:35 -08:00
}
2021-11-18 14:38:29 -08:00
fn extended_expr_part(input: Span) -> ParseResult<ExtendedPart> {
fn index_part(input: Span) -> ParseResult<Vec<Expression>> {
2021-11-18 17:16:39 -08:00
delimited(tok(char('[')), separated_list1(tok(char(',')), expression), tok(char(']')))(input)
2021-11-18 14:38:29 -08:00
}
fn call_part(input: Span) -> ParseResult<Vec<InvocationArgument>> {
2021-11-18 17:16:39 -08:00
delimited(tok(char('(')), separated_list0(tok(char(',')), invocation_argument), tok(char(')')))(input)
2021-11-18 14:38:29 -08:00
}
fn access_part(input: Span) -> ParseResult<&str> {
2021-11-18 17:16:39 -08:00
preceded(tok(char('.')), map(identifier, |item| *item.fragment()))(input)
2021-11-18 14:38:29 -08:00
}
alt((
map(index_part, ExtendedPart::Index),
map(call_part, ExtendedPart::Call),
2021-11-18 17:16:39 -08:00
map(access_part, ExtendedPart::Accessor),
2021-11-18 14:38:29 -08:00
))(input)
}
2021-11-18 17:16:39 -08:00
//TODO this shouldn't be an expression b/c type annotations disallowed here
2021-11-18 14:38:29 -08:00
fn invocation_argument(input: Span) -> ParseResult<InvocationArgument> {
alt((
map(tok(char('_')), |_| InvocationArgument::Ignored),
2021-11-18 17:16:39 -08:00
map(tuple((tok(identifier), tok(char('=')), expression)), |(name, _, expr)| {
InvocationArgument::Keyword { name: rc_string(name.fragment()), expr }
}),
2021-11-18 14:38:29 -08:00
map(expression, InvocationArgument::Positional),
))(input)
}
2021-11-17 03:59:16 -08:00
fn primary_expr(input: Span) -> ParseResult<ExpressionKind> {
2021-11-18 17:16:39 -08:00
context(
"primary-expr",
alt((
list_expr,
paren_expr,
string_literal,
float_literal,
number_literal,
bool_literal,
identifier_expr,
)),
2021-11-18 03:16:43 -08:00
)(input)
}
2021-11-18 04:01:40 -08:00
fn paren_expr(input: Span) -> ParseResult<ExpressionKind> {
delimited(
tok(char('(')),
2021-11-18 17:16:39 -08:00
map(separated_list0(tok(char(',')), expression), |mut exprs| match exprs.len() {
2021-11-18 04:01:40 -08:00
1 => exprs.pop().unwrap().kind,
_ => ExpressionKind::TupleLiteral(exprs),
}),
2021-11-18 17:16:39 -08:00
tok(char(')')),
2021-11-18 04:01:40 -08:00
)(input)
}
2021-11-18 03:32:01 -08:00
fn list_expr(input: Span) -> ParseResult<ExpressionKind> {
2021-11-18 17:16:39 -08:00
map(delimited(tok(char('[')), separated_list0(tok(char(',')), expression), tok(char(']'))), |items| {
ExpressionKind::ListLiteral(items)
})(input)
2021-11-18 03:32:01 -08:00
}
2021-11-18 03:16:43 -08:00
//TODO need to do something with prefix in the AST
fn string_literal(input: Span) -> ParseResult<ExpressionKind> {
2021-11-18 17:16:39 -08:00
tok(map(pair(opt(identifier), bare_string_literal), |(_maybe_prefix, s)| {
ExpressionKind::StringLiteral(Rc::new(s))
}))(input)
2021-11-17 01:54:35 -08:00
}
2021-11-18 03:16:43 -08:00
fn bare_string_literal(input: Span) -> ParseResult<String> {
2021-11-18 17:16:39 -08:00
let string_escape_transforms =
alt((value("\\", tag("\\")), value("\"", tag("\"")), value("\n", tag("n")), value("\t", tag("t"))));
alt((
map(tag(r#""""#), |_| String::new()),
map(
tuple((
char('"'),
escaped_transform(none_of(r#""\"#), '\\', string_escape_transforms),
char('"'),
)),
|(_, s, _)| s,
),
))(input)
2021-11-18 03:16:43 -08:00
}
2021-11-17 03:59:16 -08:00
fn identifier_expr(input: Span) -> ParseResult<ExpressionKind> {
2021-11-17 01:54:35 -08:00
context("identifier-expr", map(qualified_identifier, ExpressionKind::Value))(input)
}
2021-11-17 03:59:16 -08:00
fn qualified_identifier(input: Span) -> ParseResult<QualifiedName> {
2021-11-17 12:45:42 -08:00
let id = fresh_id(&input);
2021-11-17 12:45:55 -08:00
tok(map(separated_list1(tag("::"), map(identifier, |x| rc_string(x.fragment()))), move |items| {
QualifiedName { id, components: items }
}))(input)
2021-11-17 01:54:35 -08:00
}
2021-11-17 03:59:16 -08:00
fn identifier(input: Span) -> ParseResult<Span> {
2021-11-18 17:16:39 -08:00
recognize(pair(alt((tag("_"), alpha1)), take_while(|ch: char| is_alphanumeric(ch as u8) || ch == '_')))(
input,
)
2021-11-16 20:23:27 -08:00
}
2021-11-17 03:59:16 -08:00
fn bool_literal(input: Span) -> ParseResult<ExpressionKind> {
2021-11-17 12:45:55 -08:00
context(
"bool-literal",
alt((
map(kw("true"), |_| ExpressionKind::BoolLiteral(true)),
map(kw("false"), |_| ExpressionKind::BoolLiteral(false)),
)),
)(input)
2021-11-16 20:23:27 -08:00
}
2021-11-17 16:53:03 -08:00
fn float_literal(input: Span) -> ParseResult<ExpressionKind> {
tok(map(
alt((
recognize(tuple((digits(digit_group_dec), char('.'), opt(digits(digit_group_dec))))),
recognize(tuple((char('.'), digits(digit_group_dec)))),
)),
|ds| ExpressionKind::FloatLiteral(ds.fragment().parse().unwrap()),
))(input)
}
2021-11-17 03:59:16 -08:00
fn number_literal(input: Span) -> ParseResult<ExpressionKind> {
2021-11-16 20:23:27 -08:00
map(alt((tok(hex_literal), tok(bin_literal), tok(dec_literal))), ExpressionKind::NatLiteral)(input)
}
2021-11-17 03:59:16 -08:00
fn dec_literal(input: Span) -> ParseResult<u64> {
2021-11-16 20:23:27 -08:00
map(digits(digit_group_dec), |chars: Vec<char>| {
let s: String = chars.into_iter().collect();
s.parse().unwrap()
})(input)
}
2021-11-17 03:59:16 -08:00
fn hex_literal(input: Span) -> ParseResult<u64> {
2021-11-16 20:23:27 -08:00
map(preceded(alt((tag("0x"), tag("0X"))), digits(digit_group_hex)), |chars: Vec<char>| {
let s: String = chars.into_iter().collect();
parse_hex(&s).unwrap()
})(input)
}
2021-11-17 03:59:16 -08:00
fn bin_literal(input: Span) -> ParseResult<u64> {
2021-11-16 20:23:27 -08:00
map(preceded(alt((tag("0b"), tag("0B"))), digits(digit_group_bin)), |chars: Vec<char>| {
let s: String = chars.into_iter().collect();
parse_binary(&s).unwrap()
})(input)
}
2021-11-17 03:59:16 -08:00
fn digits<'a, E: ParseError<Span<'a>>>(
digit_type: impl Parser<Span<'a>, Vec<char>, E>,
) -> impl FnMut(Span<'a>) -> IResult<Span<'a>, Vec<char>, E> {
2021-11-16 20:23:27 -08:00
map(separated_list1(many1(char('_')), digit_type), |items: Vec<Vec<char>>| {
items.into_iter().flatten().collect()
})
}
2021-11-17 03:59:16 -08:00
fn digit_group_dec(input: Span) -> ParseResult<Vec<char>> {
2021-11-16 20:23:27 -08:00
many1(one_of("0123456789"))(input)
}
2021-11-17 03:59:16 -08:00
fn digit_group_hex(input: Span) -> ParseResult<Vec<char>> {
2021-11-16 20:23:27 -08:00
many1(one_of("0123456789abcdefABCDEF"))(input)
}
2021-11-17 03:59:16 -08:00
fn digit_group_bin(input: Span) -> ParseResult<Vec<char>> {
2021-11-16 20:23:27 -08:00
many1(one_of("01"))(input)
}
fn parse_binary(digits: &str) -> Result<u64, &'static str> {
let mut result: u64 = 0;
let mut multiplier = 1;
for d in digits.chars().rev() {
match d {
'1' => result += multiplier,
'0' => (),
'_' => continue,
_ => unreachable!(),
}
multiplier = match multiplier.checked_mul(2) {
Some(m) => m,
None => return Err("Binary expression will overflow"),
}
}
Ok(result)
}
fn parse_hex(digits: &str) -> Result<u64, &'static str> {
let mut result: u64 = 0;
let mut multiplier: u64 = 1;
for d in digits.chars().rev() {
if d == '_' {
continue;
}
match d.to_digit(16) {
Some(n) => result += n as u64 * multiplier,
None => return Err("Internal parser error: invalid hex digit"),
}
multiplier = match multiplier.checked_mul(16) {
Some(m) => m,
None => return Err("Hexadecimal expression will overflow"),
}
}
Ok(result)
}
2021-11-18 04:01:40 -08:00
#[derive(Debug)]
struct BinopSequence {
first: ExpressionKind,
rest: Vec<(BinOp, ExpressionKind)>,
}
impl BinopSequence {
fn do_precedence(self, store: &mut IdStore<ASTItem>) -> ExpressionKind {
fn helper(
precedence: i32,
lhs: ExpressionKind,
rest: &mut Vec<(BinOp, ExpressionKind)>,
store: &mut IdStore<ASTItem>,
) -> Expression {
let mut lhs = Expression::new(store.fresh(), lhs);
while let Some((next_op, next_rhs)) = rest.pop() {
let new_precedence = next_op.get_precedence();
if precedence >= new_precedence {
rest.push((next_op, next_rhs));
break;
}
let rhs = helper(new_precedence, next_rhs, rest, store);
lhs = Expression::new(
store.fresh(),
ExpressionKind::BinExp(next_op, Box::new(lhs), Box::new(rhs)),
);
}
lhs
}
let mut as_stack = self.rest.into_iter().rev().collect();
helper(BinOp::min_precedence(), self.first, &mut as_stack, store).kind
}
}
2021-11-16 20:23:27 -08:00
#[cfg(test)]
mod test {
use pretty_assertions::assert_eq;
use super::*;
2021-11-17 03:59:16 -08:00
fn rc(s: &str) -> Rc<String> {
Rc::new(s.to_owned())
}
macro_rules! qn {
( $( $component:ident),* ) => {
{
let mut components = vec![];
$(
components.push(rc(stringify!($component)));
)*
QualifiedName { components, id: Default::default() }
}
};
}
macro_rules! span {
2021-11-17 12:45:55 -08:00
($func:expr, $input:expr) => {{
2021-11-17 12:45:42 -08:00
let id_store: IdStore<ASTItem> = IdStore::new();
2021-11-17 12:45:55 -08:00
let span = Span::new_extra($input, Rc::new(RefCell::new(id_store)));
2021-11-17 12:45:42 -08:00
$func(span).map(|(span, x)| (*span.fragment(), x))
2021-11-17 12:45:55 -08:00
}};
2021-11-17 01:54:35 -08:00
}
2021-11-16 20:23:27 -08:00
#[test]
fn combinator_test1() {
2021-11-17 03:59:16 -08:00
assert_eq!(span!(digits(digit_group_dec), "342"), Ok(("", vec!['3', '4', '2'])));
assert_eq!(span!(bin_literal, "0b1111qsdf"), Ok(("qsdf", 15)));
2021-11-18 03:16:43 -08:00
assert_eq!(span!(bare_string_literal, r#""fah""#), Ok(("", "fah".to_string())));
assert_eq!(span!(bare_string_literal, r#""""#), Ok(("", "".to_string())));
2021-11-16 20:23:27 -08:00
}
2021-11-17 01:54:35 -08:00
#[test]
fn combinator_test_ws0() {
2021-11-17 03:59:16 -08:00
assert_eq!(span!(block_comment, "/*yolo*/"), Ok(("", ())));
assert_eq!(span!(block_comment, "/*yolo*/ jumpy /*nah*/"), Ok((" jumpy /*nah*/", ())));
assert_eq!(span!(ws0, "/* yolo */ "), Ok(("", ())));
assert_eq!(span!(ws0, "/* /* no */ yolo */ "), Ok(("", ())));
2021-11-17 01:54:35 -08:00
}
2021-11-16 20:23:27 -08:00
#[test]
fn combinator_test2() {
2021-11-17 01:54:35 -08:00
for s in [" 15", " 0b1111", " 1_5_", "0XF__", "0Xf"].iter() {
2021-11-17 03:59:16 -08:00
assert_eq!(span!(expression_kind, s).unwrap().1, ExpressionKind::NatLiteral(15));
2021-11-16 20:23:27 -08:00
}
2021-11-17 01:54:35 -08:00
2021-11-17 03:59:16 -08:00
assert_eq!(span!(expression_kind, " /*gay*/ true").unwrap().1, ExpressionKind::BoolLiteral(true));
2021-11-17 12:45:55 -08:00
assert_eq!(
span!(expression_kind, " /*yolo*/ barnaby").unwrap().1,
ExpressionKind::Value(qn!(barnaby))
);
2021-11-16 20:23:27 -08:00
}
}