schala/schala-lang/src/parsing/combinator.rs

337 lines
9.7 KiB
Rust

use nom::{
Err,
branch::alt,
bytes::complete::{take_till, tag},
character::complete::{alpha1, alphanumeric0, not_line_ending,none_of, char, one_of, space0, space1, multispace0, line_ending},
combinator::{opt, peek, not, value, map, recognize},
error::{context, VerboseError, ParseError},
multi::{fold_many1, many1, many0, separated_list1, separated_list0},
sequence::{pair, tuple, preceded},
IResult, Parser,
};
use nom_locate::{position, LocatedSpan};
use std::rc::Rc;
type Span<'a> = LocatedSpan<&'a str>;
type ParseResult<'a, O> = IResult<Span<'a>, O, VerboseError<Span<'a>>>;
use crate::ast::*;
fn rc_string(s: &str) -> Rc<String> {
Rc::new(s.to_string())
}
fn tok<'a, O>(input_parser: impl Parser<Span<'a>, O, VerboseError<Span<'a>>>) -> impl FnMut(Span<'a>)
-> IResult<Span<'a>, O, VerboseError<Span<'a>>> {
context("tok",
map(tuple((ws0, input_parser)), |(_, output)|
output))
}
fn kw<'a>(keyword_str: &'static str) -> impl FnMut(Span<'a>) -> ParseResult<()> {
context("keyword",
tok(value((), tag(keyword_str))))
}
// whitespace does consume at least one piece of whitespace - use ws0 for maybe none
fn whitespace(input: Span) -> ParseResult<()> {
context("whitespace",
alt((
block_comment,
line_comment,
value((), space1),
)))(input)
}
fn ws0(input: Span) -> ParseResult<()> {
context("WS0",
value((), many0(whitespace)))(input)
}
fn line_comment(input: Span) -> ParseResult<()> {
value((),
tuple((tag("//"), not_line_ending)),
)(input)
}
fn block_comment(input: Span) -> ParseResult<()> {
context("Block-comment",
value((),
tuple((
tag("/*"),
many0(alt((
value((), none_of("*/")),
value((), none_of("/*")),
block_comment,
))),
tag("*/")
))))(input)
}
fn statement_delimiter(input: Span) -> ParseResult<()> {
tok(alt((
value((), line_ending),
value((), char(';'))
))
)(input)
}
fn block(input: Span) -> ParseResult<Block> {
context("block",
map(
tuple((
tok(char('{')),
many0(statement_delimiter),
separated_list0(statement_delimiter, statement),
many0(statement_delimiter),
tok(char('}')),
)), |(_, _, items, _, _)| items.into()))(input)
}
fn statement(input: Span) -> ParseResult<Statement> {
context("Parsing-statement",
map(expression, |expr| Statement {
id: Default::default(),
location: Default::default(),
kind: StatementKind::Expression(expr),
}))(input)
}
fn expression(input: Span) -> ParseResult<Expression> {
map(pair(expression_kind, opt(type_anno)), |(kind, maybe_anno)| {
Expression::new(Default::default(), kind)
})(input)
}
fn type_anno(input: Span) -> ParseResult<TypeIdentifier> {
preceded(kw(":"), type_identifier)(input)
}
fn type_identifier(input: Span) -> ParseResult<TypeIdentifier> {
/*
alt((
tuple((kw("("), separated_list0(kw(","), type_identifier), kw(")"))),
type_singleton_name
))(input)
*/
unimplemented!()
}
fn type_singleton_name(input: Span) -> ParseResult<TypeSingletonName> {
unimplemented!()
}
pub fn expression_kind(input: Span) -> ParseResult<ExpressionKind> {
context("expression-kind", primary_expr)(input)
}
fn primary_expr(input: Span) -> ParseResult<ExpressionKind> {
context("primary-expr",
alt((
number_literal,
bool_literal,
identifier_expr,
)))(input)
}
fn identifier_expr(input: Span) -> ParseResult<ExpressionKind> {
context("identifier-expr", map(qualified_identifier, ExpressionKind::Value))(input)
}
fn qualified_identifier(input: Span) -> ParseResult<QualifiedName> {
tok(
map(
separated_list1(tag("::"), map(identifier, |x| rc_string(x.fragment()))),
|items| QualifiedName { id: Default::default(), components: items }
))(input)
}
fn identifier(input: Span) -> ParseResult<Span> {
recognize(
tuple((
alt((tag("_"), alpha1)),
alphanumeric0,
)))(input)
}
fn bool_literal(input: Span) -> ParseResult<ExpressionKind> {
context("bool-literal",
alt((
map(kw("true"), |_| ExpressionKind::BoolLiteral(true)),
map(kw("false"), |_| ExpressionKind::BoolLiteral(false)),
)))(input)
}
fn number_literal(input: Span) -> ParseResult<ExpressionKind> {
map(alt((tok(hex_literal), tok(bin_literal), tok(dec_literal))), ExpressionKind::NatLiteral)(input)
}
fn dec_literal(input: Span) -> ParseResult<u64> {
map(digits(digit_group_dec), |chars: Vec<char>| {
let s: String = chars.into_iter().collect();
s.parse().unwrap()
})(input)
}
fn hex_literal(input: Span) -> ParseResult<u64> {
map(preceded(alt((tag("0x"), tag("0X"))), digits(digit_group_hex)), |chars: Vec<char>| {
let s: String = chars.into_iter().collect();
parse_hex(&s).unwrap()
})(input)
}
fn bin_literal(input: Span) -> ParseResult<u64> {
map(preceded(alt((tag("0b"), tag("0B"))), digits(digit_group_bin)), |chars: Vec<char>| {
let s: String = chars.into_iter().collect();
parse_binary(&s).unwrap()
})(input)
}
fn digits<'a, E: ParseError<Span<'a>>>(
digit_type: impl Parser<Span<'a>, Vec<char>, E>,
) -> impl FnMut(Span<'a>) -> IResult<Span<'a>, Vec<char>, E> {
map(separated_list1(many1(char('_')), digit_type), |items: Vec<Vec<char>>| {
items.into_iter().flatten().collect()
})
}
fn digit_group_dec(input: Span) -> ParseResult<Vec<char>> {
many1(one_of("0123456789"))(input)
}
fn digit_group_hex(input: Span) -> ParseResult<Vec<char>> {
many1(one_of("0123456789abcdefABCDEF"))(input)
}
fn digit_group_bin(input: Span) -> ParseResult<Vec<char>> {
many1(one_of("01"))(input)
}
fn parse_binary(digits: &str) -> Result<u64, &'static str> {
let mut result: u64 = 0;
let mut multiplier = 1;
for d in digits.chars().rev() {
match d {
'1' => result += multiplier,
'0' => (),
'_' => continue,
_ => unreachable!(),
}
multiplier = match multiplier.checked_mul(2) {
Some(m) => m,
None => return Err("Binary expression will overflow"),
}
}
Ok(result)
}
fn parse_hex(digits: &str) -> Result<u64, &'static str> {
let mut result: u64 = 0;
let mut multiplier: u64 = 1;
for d in digits.chars().rev() {
if d == '_' {
continue;
}
match d.to_digit(16) {
Some(n) => result += n as u64 * multiplier,
None => return Err("Internal parser error: invalid hex digit"),
}
multiplier = match multiplier.checked_mul(16) {
Some(m) => m,
None => return Err("Hexadecimal expression will overflow"),
}
}
Ok(result)
}
#[cfg(test)]
mod test {
use pretty_assertions::assert_eq;
use super::*;
fn rc(s: &str) -> Rc<String> {
Rc::new(s.to_owned())
}
macro_rules! qn {
( $( $component:ident),* ) => {
{
let mut components = vec![];
$(
components.push(rc(stringify!($component)));
)*
QualifiedName { components, id: Default::default() }
}
};
}
macro_rules! span {
($func:expr, $input:expr) => {
$func(Span::new($input)).map(|(span, x)| (*span.fragment(), x))
};
}
#[test]
fn combinator_test1() {
assert_eq!(span!(digits(digit_group_dec), "342"), Ok(("", vec!['3', '4', '2'])));
assert_eq!(span!(bin_literal, "0b1111qsdf"), Ok(("qsdf", 15)));
}
#[test]
fn combinator_test_ws0() {
assert_eq!(span!(block_comment, "/*yolo*/"), Ok(("", ())));
assert_eq!(span!(block_comment, "/*yolo*/ jumpy /*nah*/"), Ok((" jumpy /*nah*/", ())));
assert_eq!(span!(ws0, "/* yolo */ "), Ok(("", ())));
assert_eq!(span!(ws0, "/* /* no */ yolo */ "), Ok(("", ())));
}
#[test]
fn combinator_test2() {
for s in [" 15", " 0b1111", " 1_5_", "0XF__", "0Xf"].iter() {
assert_eq!(span!(expression_kind, s).unwrap().1, ExpressionKind::NatLiteral(15));
}
assert_eq!(span!(expression_kind, " /*gay*/ true").unwrap().1, ExpressionKind::BoolLiteral(true));
assert_eq!(span!(expression_kind, " /*yolo*/ barnaby").unwrap().1, ExpressionKind::Value(qn!(barnaby)));
}
#[test]
fn combinator_test3() {
let source = "{}";
assert_eq!(span!(block, source).unwrap().1, vec![].into());
let source = r#"{
//hella
4_5 //bog
11; /*chutney*/0xf
}"#;
let parsed = span!(block, source).map_err(|err| match err {
Err::Error(err) | Err::Failure(err) => {
let err = VerboseError { errors: err.errors.into_iter().map(|(sp, kind)| (*sp.fragment(), kind)).collect() };
nom::error::convert_error(source, err)
},
_ => panic!()
});
if let Err(err) = parsed {
println!("{}", err);
panic!("parse error desu!");
}
assert_eq!(parsed.unwrap().1, vec![
Statement { id: Default::default(), location:
Default::default(), kind: StatementKind::Expression(Expression::new(Default::default(),
ExpressionKind::NatLiteral(45))) },
Statement { id: Default::default(), location:
Default::default(), kind: StatementKind::Expression(Expression::new(Default::default(),
ExpressionKind::NatLiteral(11))) },
Statement { id: Default::default(), location:
Default::default(), kind: StatementKind::Expression(Expression::new(Default::default(),
ExpressionKind::NatLiteral(15))) },
].into());
}
}