schala/schala-lang/language/src/parser.rs

239 lines
7.6 KiB
Rust

extern crate nom;
use std::rc::Rc;
use std::str::FromStr;
use nom::IResult;
use nom::character::complete::{one_of, space0, alphanumeric0};
use nom::bytes::complete::{tag, take, take_while, take_until};
use nom::combinator::{map, map_res, value, opt, verify};
use nom::multi::{separated_list, separated_nonempty_list, many1, many0};
use nom::error::{context, VerboseError};
use nom::branch::alt;
use nom::sequence::{pair, delimited};
use crate::ast::*;
use crate::builtin::Builtin;
type ParseResult<'a, T> = IResult<&'a str, T, VerboseError<&'a str>>;
fn single_alphabetic_character(text: &str) -> ParseResult<char> {
let p = verify(take(1usize), |s: &str| s.chars().nth(0).map(|c| c.is_alphabetic()).unwrap_or(false));
map(p, |s: &str| s.chars().nth(0).unwrap())(text)
}
fn single_alphanumeric_character(text: &str) -> ParseResult<char> {
let p = verify(take(1usize), |s: &str| s.chars().nth(0).map(|c| c.is_alphanumeric() || c == '_').unwrap_or(false));
map(p, |s: &str| s.chars().nth(0).unwrap())(text)
}
fn identifier(text: &str) -> ParseResult<Rc<String>> {
use nom::character::complete::char;
context("Identifier", map(alt((
pair(char('_'), many1(single_alphanumeric_character)),
pair(single_alphabetic_character, many0(single_alphanumeric_character))
)),
|(first, rest): (char, Vec<char>)| Rc::new(format!("{}{}", first, rest.into_iter().collect::<String>()))
))(text)
}
const OPERATOR_CHARS: &'static str = "~`!@#$%^&*-+=<>?/|";
fn parse_binop(text: &str) -> ParseResult<BinOp> {
let p = many1(one_of(OPERATOR_CHARS));
context("Binop", map(p,
|op: Vec<char>| BinOp::from_sigil(&op.into_iter().collect::<String>())
))(text)
}
fn parse_bool_literal(text: &str) -> ParseResult<ExpressionKind> {
let p = alt((
value(true, tag("true")),
value(false, tag("false"))
));
context("Bool literal", map(p, ExpressionKind::BoolLiteral))(text)
}
fn parse_number_literal(text: &str) -> ParseResult<ExpressionKind> {
let num_lit = many1(alt((
map(one_of("1234567890"), |s: char| Some(s)),
map(nom::character::complete::char('_'), |_| None)
)));
let (text, n) = context("Number literal", map_res(num_lit,
|digits: Vec<Option<char>>| {
let num_str: String = digits.into_iter().filter_map(|x| x).collect();
u64::from_str_radix(&num_str, 10)
}))(text)?;
Ok((text, ExpressionKind::NatLiteral(n)))
}
fn parse_binary_literal(input: &str) -> ParseResult<ExpressionKind> {
let (rest, _) = tag("0b")(input)?;
let (rest, n): (&str, u64) = map_res(
take_while(|c: char| c == '0' || c == '1'),
|hex_str: &str| u64::from_str_radix(hex_str, 2)
)(rest)?;
let expr = ExpressionKind::NatLiteral(n);
Ok((rest, expr))
}
fn parse_hex_literal(input: &str) -> ParseResult<ExpressionKind> {
let (rest, _) = tag("0x")(input)?;
let (rest, n): (&str, u64) = map_res(
take_while(|c: char| c.is_digit(16)),
|hex_str: &str| u64::from_str_radix(hex_str, 16)
)(rest)?;
let expr = ExpressionKind::NatLiteral(n);
Ok((rest, expr))
}
fn parse_string_literal(text: &str) -> ParseResult<ExpressionKind> {
use nom::character::complete::char;
let p = delimited(char('"'), take_until("\""), char('"'));
context("String literal", map(p,
|s: &str| ExpressionKind::StringLiteral(Rc::new(s.to_string()))
))(text)
}
fn literal(input: &str) -> ParseResult<ExpressionKind> {
context("Literal", alt((
parse_hex_literal,
parse_binary_literal,
parse_number_literal,
parse_bool_literal,
parse_string_literal,
)))(input)
}
fn paren_expr(text: &str) -> ParseResult<ExpressionKind> {
use nom::character::complete::char;
context("Paren expression", delimited(char('('), expression_kind, char(')')))(text)
}
fn prefix_op(input: &str) -> ParseResult<PrefixOp> {
use nom::character::complete::char;
let p = alt((char('+'), char('-'), char('!')));
map(p, |sigil| PrefixOp::from_str(&sigil.to_string()).unwrap())(input)
}
fn identifier_expr(text: &str) -> ParseResult<ExpressionKind> {
let (text, qualified_identifier) = map(
qualified_identifier_list,
|components| QualifiedName { id: ItemId::new(0), components }
)(text)?;
//TODO handle struct literals
let exp = Expression::new(ItemId::new(0), ExpressionKind::Value(qualified_identifier));
Ok((text, exp.kind))
}
fn qualified_identifier_list(text: &str) -> ParseResult<Vec<Rc<String>>> {
context("Qualified identifier list", separated_nonempty_list(tag("::"), identifier))(text)
}
fn primary_expr(text: &str) -> ParseResult<ExpressionKind> {
// primary := literal | paren_expr | if_expr | for_expr | while_expr | identifier_expr | lambda_expr | anonymous_struct | list_expr
alt((
literal,
paren_expr,
identifier_expr,
))(text)
}
fn invocation_argument(text: &str) -> ParseResult<InvocationArgument> {
use nom::character::complete::char;
alt((
value(InvocationArgument::Ignored, pair(char('_'), alphanumeric0)),
map(expression_kind, |kind: ExpressionKind| InvocationArgument::Positional(
Expression { id: ItemId::new(0), kind, type_anno: None }))
//map(identifier, |id: Rc<String>|
))(text)
}
fn call_expr(text: &str) -> ParseResult<ExpressionKind> {
use nom::character::complete::char;
let (text, expr) = primary_expr(text)?;
let (text, call_part) = opt(
delimited(char('('), separated_list(char(','), invocation_argument), char(')'))
)(text)?;
let output = if let Some(arguments) = call_part {
let f = bx!(Expression { id: ItemId::new(0), kind: expr, type_anno: None });
ExpressionKind::Call { f, arguments }
} else {
expr
};
Ok((text, output))
}
fn prefix_expr(text: &str) -> ParseResult<ExpressionKind> {
let (text, pfx) = delimited(space0, opt(prefix_op), space0)(text)?;
let (text, result) = call_expr(text)?;
match pfx {
None => Ok((text, result)),
Some(pfx) => {
let exp = Expression { id: ItemId::new(0), kind: result, type_anno: None };
Ok((text, ExpressionKind::PrefixExp(pfx, Box::new(exp))))
}
}
}
// this implements Pratt parsing, see http://journal.stuffwithstuff.com/2011/03/19/pratt-parsers-expression-parsing-made-easy/
fn precedence_expr(text: &str) -> ParseResult<ExpressionKind> {
fn inner_precedence_expr(input: &str, precedence: i32) -> ParseResult<ExpressionKind> {
let (mut outer_rest, mut lhs) = prefix_expr(input)?;
loop {
let (rest, _) = space0(outer_rest)?;
let (rest, maybe_binop) = opt(parse_binop)(rest)?;
let (new_precedence, binop) = match maybe_binop {
Some(binop) => (binop.precedence(), binop),
None => break,
};
if precedence >= new_precedence {
break;
}
let (rest, _) = space0(rest)?;
let (rest, rhs) = inner_precedence_expr(rest, new_precedence)?;
outer_rest = rest;
lhs = ExpressionKind::BinExp(binop,
bx!(Expression::new(ItemId::new(0), lhs)),
bx!(Expression::new(ItemId::new(0), rhs))
);
}
Ok((outer_rest, lhs))
}
context("Precedence expression",
|input| inner_precedence_expr(input, BinOp::min_precedence())
)(text)
}
fn expression_kind(text: &str) -> ParseResult<ExpressionKind> {
precedence_expr(text)
}
mod thing {
use crate::ast::*;
use crate::builtin::Builtin;
use combine::parser::range::{range, take_while1};
use combine::parser::repeat::sep_by;
use combine::*;
pub fn perform_parsing(input: &str) -> String {
let identifier = take_while1(|c: char| c.is_alphabetic());
let mut parser = sep_by(identifier, range(", "));
let result: Result<(Vec<&str>, &str), _> = parser.easy_parse(input);
format!("{:?}", result)
}
}
pub fn perform_parsing(input: &str) -> Result<String, String> {
// let output = expression_kind(input)
let output = thing::perform_parsing(input);
Ok(output)
}