rust-parser-combinator/src/lib.rs

282 lines
6.8 KiB
Rust
Raw Normal View History

2022-10-15 23:36:04 -07:00
#![feature(assert_matches)]
#![allow(dead_code)] //TODO eventually turn this off
2022-10-16 00:30:06 -07:00
use std::rc::Rc;
2022-10-15 23:36:04 -07:00
type ParseResult<I, O, E> = Result<(O, I), E>;
trait Parser<I, O, E> {
fn parse(&self, input: I) -> ParseResult<I, O, E>;
2022-10-16 00:54:41 -07:00
fn map<'a, F, O2>(self, map_fn: F) -> BoxedParser<'a, I, O2, E>
where
Self: Sized + 'a,
I: 'a,
E: 'a,
O: 'a,
O2: 'a,
F: Fn(O) -> O2 + 'a,
{
BoxedParser::new(map(self, map_fn))
}
2022-10-16 01:10:48 -07:00
2022-10-16 01:29:48 -07:00
fn to<'a, O2>(self, item: O2) -> BoxedParser<'a, I, O2, E>
where
Self: Sized + 'a,
I: 'a,
O: 'a,
O2: Clone + 'a,
E: 'a,
{
self.map(move |_| item.clone())
}
2022-10-16 01:10:48 -07:00
fn then<'a, P, O2>(self, next_parser: P) -> BoxedParser<'a, I, (O, O2), E>
where
Self: Sized + 'a,
I: 'a,
O: 'a,
O2: 'a,
E: 'a,
P: Parser<I, O2, E> + 'a,
{
BoxedParser::new(seq(self, next_parser))
}
2022-10-15 23:36:04 -07:00
}
2022-10-16 00:54:41 -07:00
struct BoxedParser<'a, I, O, E> {
inner: Box<dyn Parser<I, O, E> + 'a>,
2022-10-16 00:44:47 -07:00
}
2022-10-16 00:54:41 -07:00
impl<'a, I, O, E> BoxedParser<'a, I, O, E> {
2022-10-16 00:44:47 -07:00
fn new<P>(inner: P) -> Self
where
2022-10-16 00:54:41 -07:00
P: Parser<I, O, E> + 'a,
2022-10-16 00:44:47 -07:00
{
BoxedParser {
inner: Box::new(inner),
}
}
}
2022-10-16 00:54:41 -07:00
impl<'a, I, O, E> Parser<I, O, E> for BoxedParser<'a, I, O, E> {
2022-10-16 00:44:47 -07:00
fn parse(&self, input: I) -> ParseResult<I, O, E> {
self.inner.parse(input)
}
}
2022-10-15 23:41:22 -07:00
impl<I, O, E, F> Parser<I, O, E> for F
where
F: Fn(I) -> ParseResult<I, O, E>,
{
2022-10-15 23:36:04 -07:00
fn parse(&self, input: I) -> ParseResult<I, O, E> {
self(input)
}
}
2022-10-16 00:30:06 -07:00
impl<I, O, E, T> Parser<I, O, E> for Rc<T>
where
T: Parser<I, O, E>,
{
fn parse(&self, input: I) -> ParseResult<I, O, E> {
self.as_ref().parse(input)
}
}
2022-10-15 23:41:22 -07:00
fn literal(expected: &'static str) -> impl Fn(&str) -> ParseResult<&str, &str, &str> {
2022-10-15 23:36:04 -07:00
move |input| match input.get(0..expected.len()) {
2022-10-15 23:41:22 -07:00
Some(next) if next == expected => Ok((expected, &input[expected.len()..])),
_ => Err(input),
2022-10-15 23:36:04 -07:00
}
2022-10-10 00:13:39 -07:00
}
2022-10-15 23:41:22 -07:00
fn map<P, F, I, O1, O2, E>(parser: P, map_fn: F) -> impl Parser<I, O2, E>
where
P: Parser<I, O1, E>,
F: Fn(O1) -> O2,
{
2022-10-15 23:58:05 -07:00
move |input| {
parser
.parse(input)
.map(|(result, rest)| (map_fn(result), rest))
}
}
fn seq<P1, P2, I, O1, O2, E>(parser1: P1, parser2: P2) -> impl Parser<I, (O1, O2), E>
where
P1: Parser<I, O1, E>,
P2: Parser<I, O2, E>,
{
move |input| {
parser1.parse(input).and_then(|(result1, rest1)| {
parser2
.parse(rest1)
.map(|(result2, rest2)| ((result1, result2), rest2))
})
}
}
2022-10-16 00:39:41 -07:00
fn pred<P, F, I, O>(parser: P, pred_fn: F) -> impl Parser<I, O, I>
where
P: Parser<I, O, I>,
F: Fn(&O) -> bool,
{
move |input| {
parser.parse(input).and_then(|(result, rest)| {
if pred_fn(&result) {
Ok((result, rest))
} else {
Err(rest)
}
})
}
}
2022-10-16 00:30:06 -07:00
fn zero_or_more<P, I, O>(parser: P) -> impl Parser<I, Vec<O>, I>
2022-10-16 00:23:01 -07:00
where
P: Parser<I, O, I>,
I: Copy,
{
move |mut input| {
let mut results = Vec::new();
while let Ok((item, rest)) = parser.parse(input) {
results.push(item);
input = rest;
}
Ok((results, input))
}
}
2022-10-16 00:30:06 -07:00
fn one_or_more<P, I, O>(parser: P) -> impl Parser<I, Vec<O>, I>
where
P: Parser<I, O, I>,
I: Copy,
{
let parser = std::rc::Rc::new(parser);
map(
seq(parser.clone(), zero_or_more(parser)),
|(first, rest)| {
let mut output = vec![first];
output.extend(rest.into_iter());
output
},
)
}
2022-10-15 23:58:05 -07:00
/// Parses a standard identifier in a programming language
fn identifier(input: &str) -> ParseResult<&str, String, &str> {
let mut chars = input.chars();
let mut buf = String::new();
match chars.next() {
Some(ch) if ch.is_alphabetic() => buf.push(ch),
_ => return Err(input),
}
while let Some(next) = chars.next() {
if next.is_alphanumeric() {
buf.push(next);
} else {
break;
}
}
let next_index = buf.len();
Ok((buf, &input[next_index..]))
2022-10-15 23:41:22 -07:00
}
2022-10-16 00:39:41 -07:00
fn any_char(input: &str) -> ParseResult<&str, char, &str> {
match input.chars().next() {
Some(ch) => Ok((ch, &input[ch.len_utf8()..])),
None => Err(input),
}
}
2022-10-16 01:29:48 -07:00
fn choice<P1, P2, I, O, E>(parser1: P1, parser2: P2) -> impl Parser<I, O, E>
where
P1: Parser<I, O, E>,
P2: Parser<I, O, E>,
I: Copy,
{
move |input| match parser1.parse(input) {
ok @ Ok(..) => ok,
Err(_e) => parser2.parse(input),
}
}
2022-10-10 00:13:39 -07:00
#[cfg(test)]
mod tests {
use super::*;
2022-10-15 23:36:04 -07:00
use std::assert_matches::assert_matches;
2022-10-10 00:13:39 -07:00
#[test]
2022-10-15 23:41:22 -07:00
fn test_parsing() {
2022-10-15 23:36:04 -07:00
let output = literal("a")("a yolo");
2022-10-15 23:41:22 -07:00
assert_matches!(output.unwrap(), ("a", " yolo"));
}
2022-10-15 23:58:05 -07:00
#[test]
fn test_identifier() {
assert_matches!(identifier("bongo1beans"), Ok((s, "")) if s == "bongo1beans");
assert_matches!(identifier("2bongo1beans"), Err("2bongo1beans"));
}
2022-10-15 23:41:22 -07:00
#[test]
fn test_map() {
let lit_a = literal("a");
2022-10-16 00:54:41 -07:00
let output = lit_a.map(|s| s.to_uppercase()).parse("a yolo");
2022-10-15 23:41:22 -07:00
assert_matches!(output.unwrap(), (s, " yolo") if s == "A");
2022-10-10 00:13:39 -07:00
}
2022-10-15 23:58:05 -07:00
#[test]
fn test_seq() {
let p = seq(identifier, seq(literal(" "), literal("ruts")));
assert_matches!(p.parse("fort1 ruts"), Ok((r, "")) if r.0 == "fort1" && r.1 == (" ", "ruts") );
2022-10-16 01:10:48 -07:00
let p = identifier.then(literal(" ")).then(literal("ruts"));
assert_matches!(p.parse("fort1 ruts"), Ok((r, "")) if r.0.0 == "fort1" && r.0.1== " " && r.1 == "ruts");
2022-10-15 23:58:05 -07:00
}
2022-10-16 00:23:01 -07:00
#[test]
fn test_one_or_more() {
let p = one_or_more(literal("bongo "));
let input = "bongo bongo bongo bongo bongo ";
assert_matches!(p.parse(input), Ok((v, "")) if v.len() == 5);
2022-10-16 00:30:06 -07:00
let input = "bongo ecks";
assert_matches!(p.parse(input), Ok((v, "ecks")) if v.len() == 1);
2022-10-16 00:23:01 -07:00
}
2022-10-16 00:39:41 -07:00
#[test]
fn test_pred() {
let p = pred(any_char, |c| *c == 'f');
assert_eq!(p.parse("frog"), Ok(('f', "rog")));
}
2022-10-16 01:29:48 -07:00
#[test]
fn test_choice() {
let p = choice(literal("gnostika").to(1), one_or_more(literal(" ")).to(2));
assert_eq!(p.parse("gnostika twentynine"), Ok((1, " twentynine")));
}
2022-10-16 01:36:20 -07:00
/*
* JSON BNF
* <JSON> ::= <value>
<value> ::= <object> | <array> | <boolean> | <string> | <number> | <null>
<array> ::= "[" [<value>] {"," <value>}* "]"
<object> ::= "{" [<property>] {"," <property>}* "}"
<property> ::= <string> ":" <value>
*/
#[test]
fn parse_json() {
let json_null = literal("null");
let json_true = literal("true");
let json_false = literal("false");
let json_value = choice(json_null, choice(json_true, json_false));
assert_matches!(json_value.parse("true"), Ok(("true", "")));
}
2022-10-10 00:13:39 -07:00
}