Compare commits

...

81 Commits

Author SHA1 Message Date
Greg Shuflin 1e91bf4efd Add benchmarking code for json parser 2023-03-10 01:16:06 -08:00
Greg Shuflin b86cbfd8ff Move json into testutil module 2023-03-10 01:12:46 -08:00
Greg Shuflin d132d8dbdd Add criterion crate + basic code 2023-03-10 00:45:04 -08:00
Greg Shuflin 5526ce7bd1 More test parameterization 2023-02-26 04:41:03 -08:00
Greg Shuflin 638139b7da Start using rstest 2023-02-26 04:29:15 -08:00
Greg Shuflin 0d69aa81c1 json_number test working 2023-02-26 03:52:19 -08:00
Greg Shuflin dfb151e2a3 More work on seq representation 2023-02-25 14:48:44 -08:00
Greg Shuflin 9a3745d25c implement representation for json_bool 2023-02-25 03:27:32 -08:00
Greg Shuflin 5aca3912fc Start making representation work 2023-02-25 03:00:29 -08:00
Greg Shuflin 377d515d40 cargo fmt 2023-02-25 02:50:43 -08:00
Greg Shuflin f15488194c Remove some uses of closures in test 2023-02-25 02:50:20 -08:00
Greg Shuflin 116e2fc361 Add readme blurb 2023-02-03 16:19:40 -08:00
Greg Shuflin ffa282e89d Add readme 2023-02-03 16:12:55 -08:00
Greg Shuflin 2fd8b1be16 Representation work 2023-01-22 14:26:54 -08:00
Greg Shuflin 864246c24a Rename test 2023-01-22 14:26:54 -08:00
Greg Shuflin 172275a5f6 add parse_string test 2022-12-05 00:46:53 -08:00
Greg Shuflin daab699f1f Proptest: doesn't crash 2022-12-04 00:22:33 -08:00
Greg Shuflin 50ca820cfe Add proptest 2022-12-04 00:16:14 -08:00
Greg Shuflin d735bbab3d Add arbitrary crate
cf. https://www.greyblake.com/blog/property-based-testing-in-rust-with-arbitrary/
2022-11-11 16:41:53 -08:00
Greg Shuflin 6ac19c8989 Make parser input more complex 2022-10-23 00:41:51 -07:00
Greg Shuflin f0e1d2b045 Clippy lints 2022-10-23 00:35:24 -07:00
Greg Shuflin 3f86c08dc1 Move json into integration test 2022-10-23 00:31:59 -07:00
Greg Shuflin 4f807b991b Remove current iteration of bnf code 2022-10-23 00:25:53 -07:00
Greg Shuflin 502bfa9587 Don't special-case choices2 2022-10-22 22:44:36 -07:00
Greg Shuflin 60141dd766 more bnf testing 2022-10-22 17:07:19 -07:00
Greg Shuflin a2845cecc8 Don't need static lifetime 2022-10-22 16:13:03 -07:00
Greg Shuflin 0b51aa5073 Start working on Bnf 2022-10-22 02:05:31 -07:00
Greg Shuflin 907e15a624 Add named parser 2022-10-21 22:45:42 -07:00
Greg Shuflin 035afbf22f Refactor parser module 2022-10-21 22:22:21 -07:00
Greg Shuflin 73845224d5 Start working on bnf() 2022-10-21 21:56:08 -07:00
Greg Shuflin a049236300 reorganize combinators 2022-10-21 19:34:27 -07:00
Greg Shuflin d14b470530 Reorganize module structure 2022-10-21 19:26:00 -07:00
Greg Shuflin cde41e6eda Add real-world json as a test case 2022-10-21 19:11:40 -07:00
Greg Shuflin 74b6a1e410 json object 2022-10-21 18:58:00 -07:00
Greg Shuflin c5f971f7ff Surrounded by combinator 2022-10-21 17:38:08 -07:00
Greg Shuflin 92155a8f36 Work around infinite recursion with closure 2022-10-21 17:06:33 -07:00
Greg Shuflin 9198fdd407 Use ParserInput trait 2022-10-21 16:19:41 -07:00
Greg Shuflin a8370ad216 Split up tests 2022-10-21 15:53:26 -07:00
Greg Shuflin d2cb787a13 Add test for recursive value in array
Currently broken, need to fix
2022-10-21 00:00:23 -07:00
Greg Shuflin 6633d10af0 choice work 2022-10-20 23:47:06 -07:00
Greg Shuflin bdaf712a25 need choice for 6-tuple 2022-10-20 23:39:51 -07:00
Greg Shuflin b7db411671 json array 2022-10-20 23:37:46 -07:00
Greg Shuflin 7a832b6ba2 more tests for separated_by 2022-10-20 23:34:21 -07:00
Greg Shuflin 5705876477 Separated by 2022-10-20 23:29:06 -07:00
Greg Shuflin d624a54fb0 Start working on DelimitedBy 2022-10-20 18:38:23 -07:00
Greg Shuflin ed4567d881 repeated parse() refactor 2022-10-20 18:14:28 -07:00
Greg Shuflin 282398c7b1 Fix string bug; delimited 2022-10-20 18:10:13 -07:00
Greg Shuflin e279fe314f starting json array 2022-10-20 17:55:46 -07:00
Greg Shuflin 03ff159c95 Use functions not closures 2022-10-19 22:36:10 -07:00
Greg Shuflin e6ce2cf34d then_ignore, ignore_then 2022-10-19 22:23:52 -07:00
Greg Shuflin 0d4ea42678 Just use repeated combinator 2022-10-19 22:06:10 -07:00
Greg Shuflin 567ea60642 repeated combinator 2022-10-19 19:42:29 -07:00
Greg Shuflin 26f813ee16 tests for json 2022-10-17 01:42:42 -07:00
Greg Shuflin 32508b25f5 json numbers 2022-10-17 01:26:33 -07:00
Greg Shuflin 64a50d5bf3 incremental tests 2022-10-17 00:49:13 -07:00
Greg Shuflin a02accf08a optional combinator 2022-10-17 00:47:19 -07:00
Greg Shuflin 8b010811e3 one_of combinator 2022-10-17 00:37:55 -07:00
Greg Shuflin dd9c05ccd1 More JSON 2022-10-16 21:36:23 -07:00
Greg Shuflin 2dc754c294 More choice impls 2022-10-16 20:16:03 -07:00
Greg Shuflin 4456734134 choice function 2022-10-16 20:07:45 -07:00
Greg Shuflin eaafdaa777 choice module 2022-10-16 19:21:43 -07:00
Greg Shuflin e584668efc combinators module 2022-10-16 19:16:21 -07:00
Greg Shuflin 5e94769ed2 primitives mod 2022-10-16 19:10:32 -07:00
Greg Shuflin 364ddeac00 Put Parser in separate module 2022-10-16 18:57:10 -07:00
Greg Shuflin a9f2d4d771 seq for tuples of lengths 2 - 5 2022-10-16 18:51:14 -07:00
Greg Shuflin 750cc23a29 Adjust visibility 2022-10-16 17:35:30 -07:00
Greg Shuflin 95a6d935e4 Rename seq -> tuple2 2022-10-16 17:34:21 -07:00
Greg Shuflin c5b351e8ee Move seq into separate module 2022-10-16 17:33:10 -07:00
Greg Shuflin c6b3f51e42 Adjust one_or_more 2022-10-16 17:25:12 -07:00
Greg Shuflin ec1406a057 Json type 2022-10-16 01:42:03 -07:00
Greg Shuflin 4e711ee898 Add BNF generation 2022-10-16 01:37:51 -07:00
Greg Shuflin 22d0aa73de Start json test 2022-10-16 01:36:20 -07:00
Greg Shuflin 8002b83a86 choice combinator 2022-10-16 01:29:48 -07:00
Greg Shuflin 00d866f2a1 then method 2022-10-16 01:10:48 -07:00
Greg Shuflin 981b5c08d0 Make map a method 2022-10-16 00:54:41 -07:00
Greg Shuflin d707249904 Boxed parser 2022-10-16 00:48:19 -07:00
Greg Shuflin 0c6c59da07 Predicate 2022-10-16 00:39:41 -07:00
Greg Shuflin 870c9beb54 zero or more 2022-10-16 00:30:34 -07:00
Greg Shuflin 4eda356fd7 one or more 2022-10-16 00:23:01 -07:00
Greg Shuflin 0ee53b8f00 Sequence 2022-10-16 00:12:39 -07:00
Greg Shuflin 3bc608d4a7 Map 2022-10-15 23:41:22 -07:00
20 changed files with 1458 additions and 33 deletions

View File

@ -5,4 +5,21 @@ edition = "2021"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[features]
testutil = []
[dependencies]
arbitrary = "1.2.0"
proptest = "1.0.0"
[dev-dependencies]
criterion = "0.4.0"
rstest = "0.16.0"
# see https://github.com/rust-lang/cargo/issues/2911#issuecomment-749580481
parser-combinator = { path = ".", features = ["testutil"] }
[[bench]]
name = "json-benchmark"
harness = false

10
README.md Normal file
View File

@ -0,0 +1,10 @@
# Rust Parser Combinator
This is a super-basic Rust parser combinator library I wrote mostly
as an exercise for myself. Inspired by [nom](https://github.com/rust-bakery/nom)
and [chumsky](https://github.com/zesterer/chumsky)
## Ideas for future work
* See if some of the ideas in [Efficient Parsing with Parser Combinators](https://research.rug.nl/en/publications/efficient-parsing-with-parser-combinators)
can be incorporated here.

14
benches/json-benchmark.rs Normal file
View File

@ -0,0 +1,14 @@
use criterion::{black_box, criterion_group, criterion_main, Criterion};
use parser_combinator::testutil::json_object;
use parser_combinator::Parser;
pub fn criterion_benchmark(c: &mut Criterion) {
let test_json = include_str!("../tests/joplin-cfg.json");
c.bench_function("parse_json", |b| {
b.iter(|| json_object().parse(black_box(test_json)))
});
}
criterion_group!(benches, criterion_benchmark);
criterion_main!(benches);

198
src/choice/mod.rs Normal file
View File

@ -0,0 +1,198 @@
use crate::parser::{ParseResult, Parser, ParserInput, Representation};
pub fn choice2<P1, P2, I, O, E>(parser1: P1, parser2: P2) -> impl Parser<I, O, E>
where
P1: Parser<I, O, E>,
P2: Parser<I, O, E>,
I: ParserInput + Clone,
{
choice((parser1, parser2))
}
pub fn choice<C, I, O, E>(choices: C) -> impl Parser<I, O, E>
where
C: Choice<I, O, E>,
I: ParserInput + Clone,
{
let rep = choices.representation();
(move |input| choices.parse(input), rep)
}
pub trait Choice<I: Clone, O, E> {
fn parse(&self, input: I) -> ParseResult<I, O, E>;
fn representation(&self) -> Representation;
}
impl<I, O, E, P1, P2> Choice<I, O, E> for (P1, P2)
where
P1: Parser<I, O, E>,
P2: Parser<I, O, E>,
I: ParserInput + Clone,
{
fn parse(&self, input: I) -> ParseResult<I, O, E> {
let parsers = vec![&self.0 as &dyn Parser<I, O, E>, &self.1];
choice_loop(input, parsers)
}
fn representation(&self) -> Representation {
let parsers = vec![&self.0 as &dyn Parser<I, O, E>, &self.1];
repr_loop(parsers)
}
}
impl<I, O, E, P1, P2, P3> Choice<I, O, E> for (P1, P2, P3)
where
P1: Parser<I, O, E>,
P2: Parser<I, O, E>,
P3: Parser<I, O, E>,
I: ParserInput + Clone,
{
fn parse(&self, input: I) -> ParseResult<I, O, E> {
let parsers = vec![&self.0 as &dyn Parser<I, O, E>, &self.1, &self.2];
choice_loop(input, parsers)
}
fn representation(&self) -> Representation {
let parsers = vec![&self.0 as &dyn Parser<I, O, E>, &self.1, &self.2];
repr_loop(parsers)
}
}
impl<I, O, E, P1, P2, P3, P4> Choice<I, O, E> for (P1, P2, P3, P4)
where
P1: Parser<I, O, E>,
P2: Parser<I, O, E>,
P3: Parser<I, O, E>,
P4: Parser<I, O, E>,
I: ParserInput + Clone,
{
fn parse(&self, input: I) -> ParseResult<I, O, E> {
let parsers = vec![&self.0 as &dyn Parser<I, O, E>, &self.1, &self.2, &self.3];
choice_loop(input, parsers)
}
fn representation(&self) -> Representation {
let parsers = vec![&self.0 as &dyn Parser<I, O, E>, &self.1, &self.2, &self.3];
repr_loop(parsers)
}
}
impl<I, O, E, P1, P2, P3, P4, P5> Choice<I, O, E> for (P1, P2, P3, P4, P5)
where
P1: Parser<I, O, E>,
P2: Parser<I, O, E>,
P3: Parser<I, O, E>,
P4: Parser<I, O, E>,
P5: Parser<I, O, E>,
I: ParserInput + Clone,
{
fn parse(&self, input: I) -> ParseResult<I, O, E> {
let parsers = vec![
&self.0 as &dyn Parser<I, O, E>,
&self.1,
&self.2,
&self.3,
&self.4,
];
choice_loop(input, parsers)
}
fn representation(&self) -> Representation {
let parsers = vec![
&self.0 as &dyn Parser<I, O, E>,
&self.1,
&self.2,
&self.3,
&self.4,
];
repr_loop(parsers)
}
}
impl<I, O, E, P1, P2, P3, P4, P5, P6> Choice<I, O, E> for (P1, P2, P3, P4, P5, P6)
where
P1: Parser<I, O, E>,
P2: Parser<I, O, E>,
P3: Parser<I, O, E>,
P4: Parser<I, O, E>,
P5: Parser<I, O, E>,
P6: Parser<I, O, E>,
I: ParserInput + Clone,
{
fn parse(&self, input: I) -> ParseResult<I, O, E> {
let parsers = vec![
&self.0 as &dyn Parser<I, O, E>,
&self.1,
&self.2,
&self.3,
&self.4,
&self.5,
];
choice_loop(input, parsers)
}
fn representation(&self) -> Representation {
let parsers = vec![
&self.0 as &dyn Parser<I, O, E>,
&self.1,
&self.2,
&self.3,
&self.4,
&self.5,
];
repr_loop(parsers)
}
}
fn choice_loop<I, O, E>(input: I, parsers: Vec<&dyn Parser<I, O, E>>) -> ParseResult<I, O, E>
where
I: ParserInput + Clone,
{
//TODO need a more principled way to return an error when no choices work
let mut err = None;
for parser in parsers.iter() {
match parser.parse(input.clone()) {
Ok(result) => return Ok(result),
Err(e) => {
err = Some(e);
}
}
}
Err(err.unwrap())
}
fn repr_loop<I, O, E>(parsers: Vec<&dyn Parser<I, O, E>>) -> Representation
where
I: ParserInput + Clone,
{
let mut iter = parsers.iter().map(|p| p.representation());
Representation::from_choice(&mut iter)
}
#[cfg(test)]
mod tests {
use super::*;
use crate::combinators::repeated;
use crate::primitives::literal;
#[test]
fn test_choice() {
let p = choice2(
literal("gnostika").to(1),
repeated(literal(" ")).at_least(1).to(2),
);
assert_eq!(p.parse("gnostika twentynine"), Ok((1, " twentynine")));
}
#[test]
fn test_several_choices() {
let p = choice((
literal("a").to(1),
literal("q").to(10),
repeated(literal("chutney")).to(200),
literal("banana").to(10000),
));
assert_eq!(p.parse("q drugs").unwrap(), (10, " drugs"));
}
}

16
src/combinators/map.rs Normal file
View File

@ -0,0 +1,16 @@
use crate::parser::{Parser, ParserInput};
pub fn map<P, F, I, O1, O2, E>(parser: P, map_fn: F) -> impl Parser<I, O2, E>
where
I: ParserInput,
P: Parser<I, O1, E>,
F: Fn(O1) -> O2,
{
let rep = parser.representation();
let p = move |input| {
parser
.parse(input)
.map(|(result, rest)| (map_fn(result), rest))
};
(p, rep)
}

66
src/combinators/mod.rs Normal file
View File

@ -0,0 +1,66 @@
mod map;
mod optional;
mod repeated;
mod separated_by;
pub use map::map;
pub use optional::optional;
pub use repeated::repeated;
#[cfg(test)]
mod tests {
use super::*;
use crate::parser::Parser;
use crate::primitives::literal;
#[test]
fn test_map() {
let lit_a = literal("a");
let output = lit_a.map(|s| s.to_uppercase()).parse("a yolo");
assert_eq!(output.unwrap(), ("A".to_string(), " yolo"));
}
#[test]
fn test_one_or_more() {
let p = repeated(literal("bongo ")).at_least(1);
let input = "bongo bongo bongo bongo bongo ";
let (output, rest) = p.parse(input).unwrap();
assert_eq!(rest, "");
assert_eq!(output.len(), 5);
let (output, rest) = p.parse("bongo ecks").unwrap();
assert_eq!(output.len(), 1);
assert_eq!(rest, "ecks");
}
#[test]
fn test_separated_by() {
let p = repeated(literal("garb").to(20))
.separated_by(repeated(literal(" ")).at_least(1), false);
assert_eq!(
p.parse("garb garb garb garb").unwrap(),
(vec![20, 20, 20, 20], "")
);
assert!(p.parse("garb garb garb garb ").is_err());
let p =
repeated(literal("garb").to(20)).separated_by(repeated(literal(" ")).at_least(1), true);
assert_eq!(
p.parse("garb garb garb garb").unwrap(),
(vec![20, 20, 20, 20], "")
);
assert_eq!(
p.parse("garb garb garb garb ").unwrap(),
(vec![20, 20, 20, 20], "")
);
assert_eq!(
p.parse("garb garb garb garb q").unwrap(),
(vec![20, 20, 20, 20], "q")
);
}
}

View File

@ -0,0 +1,17 @@
use crate::parser::{Parser, ParserInput, Representation};
pub fn optional<P, I, O, E>(parser: P) -> impl Parser<I, Option<O>, E>
where
P: Parser<I, O, E>,
I: ParserInput + Clone,
{
let rep = Representation::from_choice(
&mut [parser.representation(), Representation::new("ε")].into_iter(),
);
let p = move |input: I| match parser.parse(input.clone()) {
Ok((output, rest)) => Ok((Some(output), rest)),
Err(_e) => Ok((None, input)),
};
(p, rep)
}

View File

@ -0,0 +1,94 @@
use crate::combinators::separated_by::SeparatedBy;
use crate::parser::{BoxedParser, ParseResult, Parser, ParserInput, Representation};
pub fn repeated<'a, P, I, O>(parser: P) -> Repeated<'a, I, O>
where
P: Parser<I, O, I> + 'a,
I: ParserInput + Clone + 'a,
{
Repeated {
inner_parser: BoxedParser::new(parser),
at_least: None,
at_most: None,
}
}
pub struct Repeated<'a, I, O>
where
I: ParserInput + Clone,
{
pub(super) inner_parser: BoxedParser<'a, I, O, I>,
pub(super) at_least: Option<u16>,
pub(super) at_most: Option<u16>,
}
impl<'a, I, O> Repeated<'a, I, O>
where
I: ParserInput + Clone,
{
pub fn at_least(self, n: u16) -> Self {
Self {
at_least: Some(n),
..self
}
}
pub fn at_most(self, n: u16) -> Self {
Self {
at_most: Some(n),
..self
}
}
pub fn separated_by<D, O2>(self, delimiter: D, allow_trailing: bool) -> SeparatedBy<'a, I, O>
where
D: Parser<I, O2, I> + 'a,
O2: 'a,
I: 'a,
{
SeparatedBy {
inner_repeated: self,
delimiter: BoxedParser::new(delimiter.to(())),
allow_trailing,
}
}
}
impl<'a, I, O> Parser<I, Vec<O>, I> for Repeated<'a, I, O>
where
I: ParserInput + Clone + 'a,
{
fn parse(&self, input: I) -> ParseResult<I, Vec<O>, I> {
let at_least = self.at_least.unwrap_or(0);
let at_most = self.at_most.unwrap_or(u16::MAX);
if at_most == 0 {
return Ok((vec![], input));
}
let mut results = Vec::new();
let mut count: u16 = 0;
let mut further_input = input.clone();
while let Ok((item, rest)) = self.inner_parser.parse(further_input.clone()) {
results.push(item);
further_input = rest;
count += 1;
if count >= at_most {
break;
}
}
if count < at_least {
return Err(input);
}
Ok((results, further_input))
}
fn representation(&self) -> Representation {
Representation::repeated(
self.inner_parser.representation(),
self.at_least.unwrap_or(0),
self.at_most.unwrap_or(u16::MAX),
)
}
}

View File

@ -0,0 +1,84 @@
use crate::combinators::repeated::Repeated;
use crate::parser::{BoxedParser, ParseResult, Parser, ParserInput, Representation};
pub struct SeparatedBy<'a, I, O>
where
I: ParserInput + Clone,
{
pub(super) inner_repeated: Repeated<'a, I, O>,
pub(super) delimiter: BoxedParser<'a, I, (), I>,
pub(super) allow_trailing: bool,
}
impl<'a, I, O> Parser<I, Vec<O>, I> for SeparatedBy<'a, I, O>
where
I: ParserInput + Clone + 'a,
{
fn representation(&self) -> Representation {
Representation::new("sepby")
}
fn parse(&self, input: I) -> ParseResult<I, Vec<O>, I> {
let at_least = self.inner_repeated.at_least.unwrap_or(0);
let at_most = self.inner_repeated.at_most.unwrap_or(u16::MAX);
let parser = &self.inner_repeated.inner_parser;
let delimiter = &self.delimiter;
if at_most == 0 {
return Ok((vec![], input));
}
let mut results = Vec::new();
let mut count: u16 = 0;
let mut further_input;
match parser.parse(input.clone()) {
Ok((item, rest)) => {
results.push(item);
further_input = rest;
}
Err(_e) => {
if at_least > 0 {
return Err(input);
} else {
return Ok((vec![], input));
}
}
}
loop {
match delimiter.parse(further_input.clone()) {
Ok(((), rest)) => {
further_input = rest;
}
Err(_e) => {
break;
}
}
match parser.parse(further_input.clone()) {
Ok((item, rest)) => {
results.push(item);
further_input = rest;
count += 1;
}
Err(_e) if self.allow_trailing => {
break;
}
Err(e) => {
return Err(e);
}
}
if count >= at_most {
break;
}
}
if count < at_least {
return Err(input);
}
Ok((results, further_input))
}
}

View File

@ -1,35 +1,10 @@
#![feature(assert_matches)]
#![allow(dead_code)] //TODO eventually turn this off
pub mod choice;
pub mod combinators;
mod parser;
pub mod primitives;
pub mod sequence;
#[cfg(feature = "testutil")]
pub mod testutil;
type ParseResult<I, O, E> = Result<(O, I), E>;
trait Parser<I, O, E> {
fn parse(&self, input: I) -> ParseResult<I, O, E>;
}
impl<I, O, E, F> Parser<I, O, E> for F where F: Fn(I) -> ParseResult<I, O, E> {
fn parse(&self, input: I) -> ParseResult<I, O, E> {
self(input)
}
}
fn literal(expected: &'static str) -> impl Fn(&str) -> ParseResult<&str, (), &str> {
move |input| match input.get(0..expected.len()) {
Some(next) if next == expected =>
Ok(((), &input[expected.len()..])),
_ => Err(input)
}
}
#[cfg(test)]
mod tests {
use super::*;
use std::assert_matches::assert_matches;
#[test]
fn parsing() {
let output = literal("a")("a yolo");
assert_matches!(output.unwrap(), ((), " yolo"));
}
}
pub use parser::{ParseResult, Parser, ParserInput, Representation};

View File

@ -0,0 +1,38 @@
use crate::parser::{ParseResult, Parser, ParserInput, Representation};
pub struct BoxedParser<'a, I, O, E>
where
I: ParserInput,
{
inner: Box<dyn Parser<I, O, E> + 'a>,
}
impl<'a, I, O, E> BoxedParser<'a, I, O, E>
where
I: ParserInput,
{
pub(crate) fn new<P>(inner: P) -> Self
where
P: Parser<I, O, E> + 'a,
{
BoxedParser {
inner: Box::new(inner),
}
}
}
impl<'a, I: ParserInput, O, E> Parser<I, O, E> for BoxedParser<'a, I, O, E> {
fn representation(&self) -> Representation {
self.inner.representation()
}
fn parse(&self, input: I) -> ParseResult<I, O, E> {
self.inner.parse(input)
}
fn boxed<'b>(self) -> BoxedParser<'b, I, O, E>
where
Self: Sized + 'b,
{
self
}
}

179
src/parser/mod.rs Normal file
View File

@ -0,0 +1,179 @@
mod boxed_parser;
mod named_parser;
mod parser_input;
mod representation;
use std::rc::Rc;
pub use boxed_parser::BoxedParser;
pub use named_parser::NamedParser;
pub use parser_input::ParserInput;
pub use representation::Representation;
pub type ParseResult<I, O, E> = Result<(O, I), E>;
pub trait Parser<I, O, E>
where
I: ParserInput,
{
fn parse(&self, input: I) -> ParseResult<I, O, E>;
fn representation(&self) -> Representation;
fn boxed<'a>(self) -> BoxedParser<'a, I, O, E>
where
Self: Sized + 'a,
{
BoxedParser::new(self)
}
fn map<'a, F, O2>(self, map_fn: F) -> BoxedParser<'a, I, O2, E>
where
Self: Sized + 'a,
I: 'a,
E: 'a,
O: 'a,
O2: 'a,
F: Fn(O) -> O2 + 'a,
{
crate::combinators::map(self, map_fn).boxed()
}
fn to<'a, O2>(self, item: O2) -> BoxedParser<'a, I, O2, E>
where
Self: Sized + 'a,
I: 'a,
O: 'a,
O2: Clone + 'a,
E: 'a,
{
self.map(move |_| item.clone())
}
fn then<'a, P, O2>(self, next_parser: P) -> BoxedParser<'a, I, (O, O2), E>
where
Self: Sized + 'a,
I: 'a,
O: 'a,
O2: 'a,
E: 'a,
P: Parser<I, O2, E> + 'a,
{
crate::sequence::tuple2(self, next_parser).boxed()
}
fn ignore_then<'a, P, O2>(self, next_parser: P) -> BoxedParser<'a, I, O2, E>
where
Self: Sized + 'a,
I: 'a,
O: 'a,
O2: 'a,
E: 'a,
P: Parser<I, O2, E> + 'a,
{
crate::sequence::tuple2(self, next_parser).map(|(_, next_output)| next_output)
}
fn then_ignore<'a, P, O2>(self, next_parser: P) -> BoxedParser<'a, I, O, E>
where
Self: Sized + 'a,
I: 'a,
O: 'a,
O2: 'a,
E: 'a,
P: Parser<I, O2, E> + 'a,
{
crate::sequence::tuple2(self, next_parser).map(|(this_output, _)| this_output)
}
fn delimited<'a, P1, O1, P2, O2>(self, left: P1, right: P2) -> BoxedParser<'a, I, O, E>
where
Self: Sized + 'a,
I: 'a,
O1: 'a,
O2: 'a,
O: 'a,
E: 'a,
P1: Parser<I, O1, E> + 'a,
P2: Parser<I, O2, E> + 'a,
{
crate::sequence::seq((left, self, right)).map(|(_, output, _)| output)
}
fn surrounded_by<'a, P, O1>(self, surrounding: P) -> BoxedParser<'a, I, O, E>
where
Self: Sized + 'a,
I: 'a,
O1: 'a,
O: 'a,
E: 'a,
P: Parser<I, O1, E> + 'a,
{
BoxedParser::new(move |input| {
let p1 = |i| surrounding.parse(i);
let p2 = |i| surrounding.parse(i);
let main = |i| self.parse(i);
crate::sequence::seq((p1, main, p2))
.map(|(_, output, _)| output)
.parse(input)
})
}
fn optional<'a>(self) -> BoxedParser<'a, I, Option<O>, E>
where
I: Clone + 'a,
O: 'a,
E: 'a,
Self: Sized + 'a,
{
crate::combinators::optional(self).boxed()
}
fn named<'a>(self, parser_name: &str) -> NamedParser<'a, I, O, E>
where
Self: Sized + 'a,
I: 'a,
{
NamedParser::new(self.boxed(), parser_name.to_string())
}
}
impl<I: ParserInput, O, E, F> Parser<I, O, E> for F
where
F: Fn(I) -> ParseResult<I, O, E>,
{
fn parse(&self, input: I) -> ParseResult<I, O, E> {
self(input)
}
fn representation(&self) -> Representation {
Representation::new("NOT IMPL'D")
}
}
impl<I: ParserInput, O, E, F> Parser<I, O, E> for (F, Representation)
where
F: Fn(I) -> ParseResult<I, O, E>,
{
fn parse(&self, input: I) -> ParseResult<I, O, E> {
self.0(input)
}
fn representation(&self) -> Representation {
self.1.clone()
}
}
impl<I, O, E, T> Parser<I, O, E> for Rc<T>
where
I: ParserInput,
T: Parser<I, O, E>,
{
fn parse(&self, input: I) -> ParseResult<I, O, E> {
self.as_ref().parse(input)
}
fn representation(&self) -> Representation {
self.as_ref().representation()
}
}

View File

@ -0,0 +1,36 @@
use super::boxed_parser::BoxedParser;
use crate::parser::{ParseResult, Parser, ParserInput, Representation};
pub struct NamedParser<'a, I, O, E>
where
I: ParserInput,
{
inner_parser: BoxedParser<'a, I, O, E>,
name: String,
}
impl<'a, I, O, E> NamedParser<'a, I, O, E>
where
I: ParserInput,
{
pub(super) fn new(inner_parser: BoxedParser<'a, I, O, E>, name: String) -> Self
where
I: 'a,
{
NamedParser { inner_parser, name }
}
pub fn get_name(&'a self) -> &'a str {
self.name.as_ref()
}
}
impl<'a, I: ParserInput, O, E> Parser<I, O, E> for NamedParser<'a, I, O, E> {
fn representation(&self) -> Representation {
self.inner_parser.representation()
}
fn parse(&self, input: I) -> ParseResult<I, O, E> {
self.inner_parser.parse(input)
}
}

View File

@ -0,0 +1,11 @@
pub trait ParserInput: std::fmt::Debug {
type Output;
fn next_token() -> Self::Output;
}
impl ParserInput for &str {
type Output = ();
fn next_token() -> Self::Output {
()
}
}

View File

@ -0,0 +1,66 @@
#[derive(Debug, Clone, PartialEq)]
pub struct Representation {
val: String,
}
impl Representation {
pub fn new(from: &str) -> Self {
Self {
val: from.to_string(),
}
}
pub(crate) fn from_choice(
choice_parser_reps: &mut impl Iterator<Item = Representation>,
) -> Self {
let mut buf = String::new();
let mut iter = choice_parser_reps.peekable();
loop {
let rep = match iter.next() {
Some(r) => r,
None => break,
};
buf.push_str(&rep.val);
match iter.peek() {
Some(_) => {
buf.push_str(" | ");
}
None => {
break;
}
}
}
Representation::new(&buf)
}
pub(crate) fn from_sequence(
sequence_representations: &mut impl Iterator<Item = Representation>,
) -> Self {
let mut buf = String::new();
let mut iter = sequence_representations.peekable();
loop {
let rep = match iter.next() {
Some(r) => r,
None => break,
};
buf.push_str(&rep.val);
match iter.peek() {
Some(_) => {
buf.push_str(" ");
}
None => {
break;
}
}
}
Representation::new(&buf)
}
// TODO use at_least, at_most
pub(crate) fn repeated(underlying: Representation, at_least: u16, _at_most: u16) -> Self {
let sigil = if at_least == 0 { "*" } else { "+" };
Representation::new(&format!("({}){}", underlying.val, sigil))
}
}

108
src/primitives/mod.rs Normal file
View File

@ -0,0 +1,108 @@
use crate::parser::{ParseResult, Parser, ParserInput, Representation};
pub fn literal_char(expected: char) -> impl Fn(&str) -> ParseResult<&str, char, &str> {
move |input| match input.chars().next() {
Some(ch) if ch == expected => Ok((expected, &input[ch.len_utf8()..])),
_ => Err(input),
}
}
pub fn literal<'a>(expected: &'static str) -> impl Parser<&'a str, &'a str, &'a str> {
println!("literal call expected: {}", expected);
let rep = Representation::new(expected);
let p = move |input: &'a str| match input.get(0..expected.len()) {
Some(next) if next == expected => Ok((expected, &input[expected.len()..])),
_ => Err(input),
};
(p, rep)
}
pub fn any_char(input: &str) -> ParseResult<&str, char, &str> {
match input.chars().next() {
Some(ch) => Ok((ch, &input[ch.len_utf8()..])),
None => Err(input),
}
}
pub fn one_of<'a>(items: &'static str) -> impl Parser<&'a str, &'a str, &'a str> {
let p = move |input: &'a str| {
if let Some(ch) = input.chars().next() {
if items.contains(ch) {
let (first, rest) = input.split_at(1);
return Ok((first, rest));
}
}
Err(input)
};
let mut s = String::new();
for ch in items.chars() {
s.push(ch);
s.push_str(" | ");
}
let rep = Representation::new(&s);
(p, rep)
}
pub fn pred<P, F, I, O>(parser: P, pred_fn: F) -> impl Parser<I, O, I>
where
I: ParserInput,
P: Parser<I, O, I>,
F: Fn(&O) -> bool,
{
let orig_rep = parser.representation();
(
move |input| {
parser.parse(input).and_then(|(result, rest)| {
if pred_fn(&result) {
Ok((result, rest))
} else {
Err(rest)
}
})
},
Representation::new(&format!("{:?} if <PREDICATE>", orig_rep)),
)
}
/// Parses a standard identifier in a programming language
pub fn identifier(input: &str) -> ParseResult<&str, String, &str> {
let mut chars = input.chars();
let mut buf = String::new();
match chars.next() {
Some(ch) if ch.is_alphabetic() => buf.push(ch),
_ => return Err(input),
}
for next in chars {
if next.is_alphanumeric() {
buf.push(next);
} else {
break;
}
}
let next_index = buf.len();
Ok((buf, &input[next_index..]))
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_identifier() {
assert_eq!(
identifier("bongo1beans").unwrap(),
(("bongo1beans".to_string(), ""))
);
assert_eq!(identifier("2bongo1beans"), Err("2bongo1beans"));
}
#[test]
fn test_pred() {
let p = pred(any_char, |c| *c == 'f');
assert_eq!(p.parse("frog"), Ok(('f', "rog")));
}
}

195
src/sequence/mod.rs Normal file
View File

@ -0,0 +1,195 @@
use crate::parser::{ParseResult, Parser, ParserInput, Representation};
pub fn tuple2<P1, P2, I, O1, O2, E>(parser1: P1, parser2: P2) -> impl Parser<I, (O1, O2), E>
where
I: ParserInput,
P1: Parser<I, O1, E>,
P2: Parser<I, O2, E>,
{
seq((parser1, parser2))
}
pub fn seq<T, I, O, E>(sequence: T) -> impl Parser<I, O, E>
where
I: ParserInput,
T: Sequence<I, O, E>,
{
let rep = sequence.representation();
let p = move |input| sequence.parse(input);
(p, rep)
}
/* TODO - eventually rewrite this parser combinator in Schala. Seeing what this
* code that makes heavy use of type variables and abstraction over types looks like
* in Schala's type system should be educational
*/
pub trait Sequence<I, O, E> {
fn parse(&self, input: I) -> ParseResult<I, O, E>;
fn representation(&self) -> Representation;
}
impl<I, O1, O2, E, P1, P2> Sequence<I, (O1, O2), E> for (P1, P2)
where
I: ParserInput,
P1: Parser<I, O1, E>,
P2: Parser<I, O2, E>,
{
fn parse(&self, input: I) -> ParseResult<I, (O1, O2), E> {
let parser1 = &self.0;
let parser2 = &self.1;
parser1.parse(input).and_then(|(result1, rest1)| {
parser2
.parse(rest1)
.map(|(result2, rest2)| ((result1, result2), rest2))
})
}
fn representation(&self) -> Representation {
let mut iter = [self.0.representation(), self.1.representation()].into_iter();
Representation::from_sequence(&mut iter)
}
}
impl<I, O1, O2, O3, E, P1, P2, P3> Sequence<I, (O1, O2, O3), E> for (P1, P2, P3)
where
I: ParserInput,
P1: Parser<I, O1, E>,
P2: Parser<I, O2, E>,
P3: Parser<I, O3, E>,
{
fn parse(&self, input: I) -> ParseResult<I, (O1, O2, O3), E> {
let parser1 = &self.0;
let parser2 = &self.1;
let parser3 = &self.2;
let (result1, rest1) = parser1.parse(input)?;
let (result2, rest2) = parser2.parse(rest1)?;
let (result3, rest3) = parser3.parse(rest2)?;
Ok(((result1, result2, result3), rest3))
}
fn representation(&self) -> Representation {
let mut iter = [
self.0.representation(),
self.1.representation(),
self.2.representation(),
]
.into_iter();
Representation::from_sequence(&mut iter)
}
}
impl<I, O1, O2, O3, O4, E, P1, P2, P3, P4> Sequence<I, (O1, O2, O3, O4), E> for (P1, P2, P3, P4)
where
I: ParserInput,
P1: Parser<I, O1, E>,
P2: Parser<I, O2, E>,
P3: Parser<I, O3, E>,
P4: Parser<I, O4, E>,
{
fn parse(&self, input: I) -> ParseResult<I, (O1, O2, O3, O4), E> {
let parser1 = &self.0;
let parser2 = &self.1;
let parser3 = &self.2;
let parser4 = &self.3;
let (result1, rest1) = parser1.parse(input)?;
let (result2, rest2) = parser2.parse(rest1)?;
let (result3, rest3) = parser3.parse(rest2)?;
let (result4, rest4) = parser4.parse(rest3)?;
Ok(((result1, result2, result3, result4), rest4))
}
fn representation(&self) -> Representation {
let mut iter = [
self.0.representation(),
self.1.representation(),
self.2.representation(),
self.3.representation(),
]
.into_iter();
Representation::from_sequence(&mut iter)
}
}
impl<I, O1, O2, O3, O4, O5, E, P1, P2, P3, P4, P5> Sequence<I, (O1, O2, O3, O4, O5), E>
for (P1, P2, P3, P4, P5)
where
I: ParserInput,
P1: Parser<I, O1, E>,
P2: Parser<I, O2, E>,
P3: Parser<I, O3, E>,
P4: Parser<I, O4, E>,
P5: Parser<I, O5, E>,
{
fn parse(&self, input: I) -> ParseResult<I, (O1, O2, O3, O4, O5), E> {
let parser1 = &self.0;
let parser2 = &self.1;
let parser3 = &self.2;
let parser4 = &self.3;
let parser5 = &self.4;
let (result1, rest1) = parser1.parse(input)?;
let (result2, rest2) = parser2.parse(rest1)?;
let (result3, rest3) = parser3.parse(rest2)?;
let (result4, rest4) = parser4.parse(rest3)?;
let (result5, rest5) = parser5.parse(rest4)?;
Ok(((result1, result2, result3, result4, result5), rest5))
}
fn representation(&self) -> Representation {
let mut iter = [
self.0.representation(),
self.1.representation(),
self.2.representation(),
self.3.representation(),
self.4.representation(),
]
.into_iter();
Representation::from_sequence(&mut iter)
}
}
#[cfg(test)]
mod test {
use super::*;
use crate::combinators::repeated;
use crate::primitives::{identifier, literal};
#[test]
fn test_tuple2() {
let p = tuple2(identifier, tuple2(literal(" "), literal("ruts")));
let (output, _rest) = p.parse("fort1 ruts").unwrap();
assert_eq!(output, ("fort1".into(), (" ", "ruts")));
let p = identifier.then(literal(" ")).then(literal("ruts"));
let (output, _rest) = p.parse("fort1 ruts").unwrap();
assert_eq!(output, (("fort1".into(), " "), "ruts"));
}
#[test]
fn test_seq() {
let p = seq((
literal("bong").to(10),
repeated(literal(" ")).to(()),
literal("hits").to(20),
));
assert_eq!(p.parse("bong hits").unwrap(), ((10, (), 20), ""));
let p = seq((
literal("alpha").to(10),
repeated(literal(" ")).to(()),
repeated(literal("-")).to(()),
repeated(literal(" ")),
literal("beta"),
));
assert_eq!(
p.parse("alpha ------ beta gamma").unwrap(),
((10, (), (), vec![" ", " ", " "], "beta"), " gamma")
);
}
}

135
src/testutil/mod.rs Normal file
View File

@ -0,0 +1,135 @@
use crate::choice::choice;
use crate::combinators::repeated;
use crate::primitives::{any_char, literal, literal_char, one_of, pred};
use crate::sequence::seq;
use crate::Parser;
/*
* JSON BNF
* <JSON> ::= <value>
<value> ::= <object> | <array> | <boolean> | <string> | <number> | <null>
<array> ::= "[" [<value>] {"," <value>}* "]"
<object> ::= "{" [<property>] {"," <property>}* "}"
<property> ::= <string> ":" <value>
*/
#[derive(Debug, Clone, PartialEq)]
pub enum JsonValue {
Null,
Bool(bool),
Str(String),
Num(f64),
Array(Vec<JsonValue>),
Object(Vec<(String, JsonValue)>),
}
pub trait JsonParser<'a, T>: Parser<&'a str, T, &'a str> {}
impl<'a, T, P> JsonParser<'a, T> for P where P: Parser<&'a str, T, &'a str> {}
pub fn json_null<'a>() -> impl JsonParser<'a, JsonValue> {
literal("null").to(JsonValue::Null)
}
pub fn json_bool<'a>() -> impl JsonParser<'a, JsonValue> {
choice((
literal("true").to(JsonValue::Bool(true)),
literal("false").to(JsonValue::Bool(false)),
))
}
pub fn json_number<'a>() -> impl JsonParser<'a, JsonValue> {
fn digit<'a>() -> impl JsonParser<'a, &'a str> {
one_of("1234567890")
}
fn digits<'a>() -> impl JsonParser<'a, Vec<&'a str>> {
repeated(digit()).at_least(1)
}
let json_number_inner = choice((
seq((digits(), literal(".").ignore_then(digits()).optional())).map(
|(mut digits, maybe_decimal)| {
if let Some(decimal_digits) = maybe_decimal {
digits.push(".");
digits.extend(decimal_digits.into_iter());
}
digits.into_iter().collect::<String>()
},
),
literal(".").ignore_then(digits()).map(|decimal_digits| {
let mut d = vec!["."];
d.extend(decimal_digits.into_iter());
d.into_iter().collect::<String>()
}),
))
.map(|digits| digits.parse::<f64>().unwrap());
literal("-")
.optional()
.then(json_number_inner)
.map(|(maybe_sign, mut val)| {
if maybe_sign.is_some() {
val *= -1.0;
}
JsonValue::Num(val)
})
}
pub fn json_string_raw<'a>() -> impl JsonParser<'a, String> {
seq((
literal_char('"'),
repeated(pred(any_char, |ch| *ch != '"')),
literal_char('"'),
))
.map(|(_, s, _)| s.iter().cloned().collect::<String>())
}
pub fn json_string<'a>() -> impl JsonParser<'a, JsonValue> {
json_string_raw().map(JsonValue::Str)
}
fn whitespace<'a>() -> impl JsonParser<'a, ()> {
repeated(choice((
literal_char('\t'),
literal_char('\n'),
literal_char(' '),
)))
.to(())
}
pub fn json_array<'a>() -> impl JsonParser<'a, JsonValue> {
move |input| {
let val = json_value().surrounded_by(whitespace());
repeated(val)
.separated_by(literal(","), false)
.delimited(literal_char('['), literal_char(']'))
.map(JsonValue::Array)
.parse(input)
}
}
pub fn json_object<'a>() -> impl JsonParser<'a, JsonValue> {
move |input| {
let kv = json_string_raw()
.surrounded_by(whitespace())
.then_ignore(literal_char(':'))
.then(json_value().surrounded_by(whitespace()));
repeated(kv)
.separated_by(literal_char(','), false)
.delimited(literal_char('{'), literal_char('}'))
.map(JsonValue::Object)
.parse(input)
}
}
pub fn json_value<'a>() -> impl JsonParser<'a, JsonValue> {
choice((
json_null(),
json_bool(),
json_number(),
json_string(),
json_array(),
json_object(),
))
}

49
tests/joplin-cfg.json Normal file
View File

@ -0,0 +1,49 @@
{
"$schema": "https://joplinapp.org/schema/settings.json",
"locale": "en_GB",
"sync.target": 6,
"markdown.plugin.softbreaks": false,
"markdown.plugin.typographer": false,
"spellChecker.language": "en-US",
"ui.layout": {
"key": "root",
"children": [
{
"key": "sideBar",
"width": 250,
"visible": true
},
{
"key": "noteList",
"width": 250,
"visible": true
},
{
"key": "editor",
"visible": true,
"width": 1493
},
{
"key": "plugin-view-joplin.plugin.note.tabs-note.tabs.panel",
"context": {
"pluginId": "joplin.plugin.note.tabs"
},
"visible": true
}
],
"visible": true
},
"noteVisiblePanes": [
"editor",
"viewer"
],
"theme": 4,
"sync.6.username": "webdav",
"net.ignoreTlsErrors": true,
"style.editor.contentMaxWidth": 600,
"editor.codeView": true,
"markdown.plugin.sub": true,
"markdown.plugin.sup": true,
"markdown.plugin.multitable": true
}

117
tests/json_parser.rs Normal file
View File

@ -0,0 +1,117 @@
use parser_combinator::primitives::literal;
use parser_combinator::testutil::{
json_array, json_bool, json_null, json_number, json_object, json_string, JsonValue,
};
use parser_combinator::{Parser, Representation};
use proptest::prelude::*;
use rstest::*;
proptest! {
#[test]
fn doesnt_crash(s in "\\PC*") {
let _output = json_object().parse(&s);
}
#[test]
fn parse_string(s in r#"[^"]+"#) {
let input = format!("\"{}\"", s);
let output = json_string().parse(&input).unwrap();
match output {
(JsonValue::Str(output_s), "") if output_s == s => (),
_ => panic!(),
}
}
}
#[test]
fn test_parsing() {
let output = literal("a").parse("a yolo");
assert_eq!(output.unwrap(), ("a", " yolo"));
}
#[test]
fn parse_json_primitives() {
assert_eq!(
json_string().parse(r#""yolo swagg""#).unwrap(),
(JsonValue::Str("yolo swagg".into()), "")
);
assert_eq!(
json_number().parse("-383").unwrap().0,
JsonValue::Num(-383f64)
);
assert_eq!(
json_number().parse("-.383").unwrap().0,
JsonValue::Num(-0.383)
);
assert_eq!(
json_number().parse(".383").unwrap().0,
JsonValue::Num(0.383)
);
assert_eq!(
json_number().parse("-1.383").unwrap().0,
JsonValue::Num(-1.383)
);
}
#[rstest]
#[case(r#"[ 4, 9, "ara",]"#)]
fn parse_json_array_err(#[case] input: &str) {
assert!(json_array().parse(input).is_err());
}
#[rstest]
#[case("[[],[]]", (JsonValue::Array(vec![JsonValue::Array(vec![]), JsonValue::Array(vec![])]), ""))]
#[case(r#"[ 4, 9, "foo" ]"#, (
JsonValue::Array(vec![
JsonValue::Num(4.),
JsonValue::Num(9.0),
JsonValue::Str("foo".to_string())
]),
""
))]
#[case(r#"[8,null,[],5],{}"#,
(
JsonValue::Array(vec![
JsonValue::Num(8.),
JsonValue::Null,
JsonValue::Array(vec![]),
JsonValue::Num(5.),
]),
",{}"
))]
fn parse_json_array(#[case] input: &str, #[case] expected: (JsonValue, &str)) {
assert_eq!(json_array().parse(input).unwrap(), expected);
}
#[test]
fn parse_json_object() {
assert_eq!(
json_object().parse(r#"{ "a": 23}"#).unwrap().0,
JsonValue::Object(vec![("a".into(), JsonValue::Num(23.))])
);
assert_eq!(
json_object().parse(r#"{}"#).unwrap().0,
JsonValue::Object(vec![])
);
}
#[test]
fn parse_json_document() {
let test_json = include_str!("joplin-cfg.json");
let parsed_json = json_object().parse(test_json);
assert!(parsed_json.is_ok());
}
#[rstest]
#[case(json_null().representation(), Representation::new("null"))]
#[case(json_bool().representation(), Representation::new("true | false"))]
#[case(json_number().representation(), Representation::new("- | ε (1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 0 | )+ . (1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 0 | )+ | ε | . (1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 0 | )+"))]
fn representations_test(
#[case] parser_representation: Representation,
#[case] expected: Representation,
) {
assert_eq!(parser_representation, expected);
}