Compare commits

..

10 Commits

Author SHA1 Message Date
Greg Shuflin
ffa282e89d Add readme 2023-02-03 16:12:55 -08:00
Greg Shuflin
2fd8b1be16 Representation work 2023-01-22 14:26:54 -08:00
Greg Shuflin
864246c24a Rename test 2023-01-22 14:26:54 -08:00
Greg Shuflin
172275a5f6 add parse_string test 2022-12-05 00:46:53 -08:00
Greg Shuflin
daab699f1f Proptest: doesn't crash 2022-12-04 00:22:33 -08:00
Greg Shuflin
50ca820cfe Add proptest 2022-12-04 00:16:14 -08:00
Greg Shuflin
d735bbab3d Add arbitrary crate
cf. https://www.greyblake.com/blog/property-based-testing-in-rust-with-arbitrary/
2022-11-11 16:41:53 -08:00
Greg Shuflin
6ac19c8989 Make parser input more complex 2022-10-23 00:41:51 -07:00
Greg Shuflin
f0e1d2b045 Clippy lints 2022-10-23 00:35:24 -07:00
Greg Shuflin
3f86c08dc1 Move json into integration test 2022-10-23 00:31:59 -07:00
13 changed files with 327 additions and 235 deletions

View File

@ -6,3 +6,5 @@ edition = "2021"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies] [dependencies]
arbitrary = "1.2.0"
proptest = "1.0.0"

5
README.md Normal file
View File

@ -0,0 +1,5 @@
# Rust Parser Combinator
This is a super-basic Rust parser combinator library I wrote mostly
as an exercise for myself. Inspired by [nom](https://github.com/rust-bakery/nom)
and [chumsky](https://github.com/zesterer/chumsky)

View File

@ -1,5 +1,5 @@
use crate::combinators::separated_by::SeparatedBy; use crate::combinators::separated_by::SeparatedBy;
use crate::parser::{BoxedParser, ParseResult, Parser, ParserInput}; use crate::parser::{BoxedParser, ParseResult, Parser, ParserInput, Representation};
pub fn repeated<'a, P, I, O>(parser: P) -> Repeated<'a, I, O> pub fn repeated<'a, P, I, O>(parser: P) -> Repeated<'a, I, O>
where where
@ -83,4 +83,8 @@ where
Ok((results, further_input)) Ok((results, further_input))
} }
fn representation(&self) -> Representation {
Representation::new("NOT IMPL'D")
}
} }

View File

@ -1,5 +1,5 @@
use crate::combinators::repeated::Repeated; use crate::combinators::repeated::Repeated;
use crate::parser::{BoxedParser, ParseResult, Parser, ParserInput}; use crate::parser::{BoxedParser, ParseResult, Parser, ParserInput, Representation};
pub struct SeparatedBy<'a, I, O> pub struct SeparatedBy<'a, I, O>
where where
@ -14,6 +14,10 @@ impl<'a, I, O> Parser<I, Vec<O>, I> for SeparatedBy<'a, I, O>
where where
I: ParserInput + Clone + 'a, I: ParserInput + Clone + 'a,
{ {
fn representation(&self) -> Representation {
Representation::new("NOT IMPL'D")
}
fn parse(&self, input: I) -> ParseResult<I, Vec<O>, I> { fn parse(&self, input: I) -> ParseResult<I, Vec<O>, I> {
let at_least = self.inner_repeated.at_least.unwrap_or(0); let at_least = self.inner_repeated.at_least.unwrap_or(0);
let at_most = self.inner_repeated.at_most.unwrap_or(u16::MAX); let at_most = self.inner_repeated.at_most.unwrap_or(u16::MAX);

View File

@ -4,221 +4,4 @@ mod parser;
pub mod primitives; pub mod primitives;
pub mod sequence; pub mod sequence;
#[cfg(test)]
pub use parser::{ParseResult, Parser, ParserInput}; pub use parser::{ParseResult, Parser, ParserInput};
#[cfg(test)]
mod tests {
use super::*;
use crate::choice::choice;
use crate::combinators::repeated;
use crate::primitives::{any_char, literal, literal_char, one_of, pred};
use crate::sequence::seq;
#[test]
fn test_parsing() {
let output = literal("a")("a yolo");
assert_eq!(output.unwrap(), ("a", " yolo"));
}
/*
* JSON BNF
* <JSON> ::= <value>
<value> ::= <object> | <array> | <boolean> | <string> | <number> | <null>
<array> ::= "[" [<value>] {"," <value>}* "]"
<object> ::= "{" [<property>] {"," <property>}* "}"
<property> ::= <string> ":" <value>
*/
#[derive(Debug, Clone, PartialEq)]
enum JsonValue {
Null,
Bool(bool),
Str(String),
Num(f64),
Array(Vec<JsonValue>),
Object(Vec<(String, JsonValue)>),
}
trait JsonParser<'a, T>: Parser<&'a str, T, &'a str> {}
impl<'a, T, P> JsonParser<'a, T> for P where P: Parser<&'a str, T, &'a str> {}
fn json_null<'a>() -> impl JsonParser<'a, JsonValue> {
literal("null").to(JsonValue::Null)
}
fn json_bool<'a>() -> impl JsonParser<'a, JsonValue> {
choice((
literal("true").to(JsonValue::Bool(true)),
literal("false").to(JsonValue::Bool(false)),
))
}
fn json_number() -> impl JsonParser<'static, JsonValue> {
let digit = || one_of("1234567890");
let digits = || repeated(digit()).at_least(1);
let json_number_inner = choice((
seq((digits(), literal(".").ignore_then(digits()).optional())).map(
|(mut digits, maybe_decimal)| {
if let Some(decimal_digits) = maybe_decimal {
digits.push(".");
digits.extend(decimal_digits.into_iter());
}
digits.into_iter().collect::<String>()
},
),
literal(".").ignore_then(digits()).map(|decimal_digits| {
let mut d = vec!["."];
d.extend(decimal_digits.into_iter());
d.into_iter().collect::<String>()
}),
))
.map(|digits| digits.parse::<f64>().unwrap());
literal("-")
.optional()
.then(json_number_inner)
.map(|(maybe_sign, mut val)| {
if maybe_sign.is_some() {
val *= -1.0;
}
JsonValue::Num(val)
})
}
fn json_string_raw() -> impl JsonParser<'static, String> {
seq((
literal_char('"'),
repeated(pred(any_char, |ch| *ch != '"')),
literal_char('"'),
))
.map(|(_, s, _)| s.iter().cloned().collect::<String>())
}
fn json_string() -> impl JsonParser<'static, JsonValue> {
json_string_raw().map(JsonValue::Str)
}
fn whitespace() -> impl JsonParser<'static, ()> {
repeated(choice((
literal_char('\t'),
literal_char('\n'),
literal_char(' '),
)))
.to(())
}
fn json_array() -> impl JsonParser<'static, JsonValue> {
move |input| {
let val = json_value().surrounded_by(whitespace());
repeated(val)
.separated_by(literal(","), false)
.delimited(literal_char('['), literal_char(']'))
.map(JsonValue::Array)
.parse(input)
}
}
fn json_object() -> impl JsonParser<'static, JsonValue> {
move |input| {
let kv = json_string_raw()
.surrounded_by(whitespace())
.then_ignore(literal_char(':'))
.then(json_value().surrounded_by(whitespace()));
repeated(kv)
.separated_by(literal_char(','), false)
.delimited(literal_char('{'), literal_char('}'))
.map(JsonValue::Object)
.parse(input)
}
}
fn json_value() -> impl JsonParser<'static, JsonValue> {
choice((
json_null(),
json_bool(),
json_number(),
json_string(),
json_array(),
json_object(),
))
}
#[test]
fn parse_json_primitives() {
assert_eq!(
json_string().parse(r#""yolo swagg""#).unwrap(),
(JsonValue::Str("yolo swagg".into()), "")
);
assert_eq!(
json_number().parse("-383").unwrap().0,
JsonValue::Num(-383f64)
);
assert_eq!(
json_number().parse("-.383").unwrap().0,
JsonValue::Num(-0.383)
);
assert_eq!(
json_number().parse(".383").unwrap().0,
JsonValue::Num(0.383)
);
assert_eq!(
json_number().parse("-1.383").unwrap().0,
JsonValue::Num(-1.383)
);
}
#[test]
fn parse_json_array() {
assert!(json_array().parse(r#"[ 4, 9, "ara",]"#).is_err());
assert_eq!(
json_array().parse("[[],[]]").unwrap().0,
JsonValue::Array(vec![JsonValue::Array(vec![]), JsonValue::Array(vec![])])
);
assert_eq!(
json_array().parse(r#"[ 4, 9, "foo" ]"#).unwrap(),
(
JsonValue::Array(vec![
JsonValue::Num(4.),
JsonValue::Num(9.0),
JsonValue::Str("foo".to_string())
]),
""
)
);
assert_eq!(
json_array().parse(r#"[8,null,[],5],{}"#).unwrap(),
(
JsonValue::Array(vec![
JsonValue::Num(8.),
JsonValue::Null,
JsonValue::Array(vec![]),
JsonValue::Num(5.),
]),
",{}"
)
);
assert_eq!(json_value().parse("true"), Ok((JsonValue::Bool(true), "")));
}
#[test]
fn parse_json_object() {
assert_eq!(
json_object().parse(r#"{ "a": 23}"#).unwrap().0,
JsonValue::Object(vec![("a".into(), JsonValue::Num(23.))])
);
assert_eq!(
json_object().parse(r#"{}"#).unwrap().0,
JsonValue::Object(vec![])
);
}
#[test]
fn parse_json() {
let test_json = include_str!("joplin-cfg.json");
let parsed_json = json_object().parse(test_json);
assert!(parsed_json.is_ok());
}
}

View File

@ -1,4 +1,4 @@
use crate::parser::{ParseResult, Parser, ParserInput}; use crate::parser::{ParseResult, Parser, ParserInput, Representation};
pub struct BoxedParser<'a, I, O, E> pub struct BoxedParser<'a, I, O, E>
where where
@ -22,6 +22,9 @@ where
} }
impl<'a, I: ParserInput, O, E> Parser<I, O, E> for BoxedParser<'a, I, O, E> { impl<'a, I: ParserInput, O, E> Parser<I, O, E> for BoxedParser<'a, I, O, E> {
fn representation(&self) -> Representation {
Representation::new("NOT IMPL'D")
}
fn parse(&self, input: I) -> ParseResult<I, O, E> { fn parse(&self, input: I) -> ParseResult<I, O, E> {
self.inner.parse(input) self.inner.parse(input)
} }

View File

@ -1,24 +1,25 @@
mod boxed_parser; mod boxed_parser;
mod named_parser; mod named_parser;
mod parser_input;
mod representation;
use std::rc::Rc; use std::rc::Rc;
pub use boxed_parser::BoxedParser; pub use boxed_parser::BoxedParser;
pub use named_parser::NamedParser; pub use named_parser::NamedParser;
pub use parser_input::ParserInput;
pub use representation::Representation;
pub type ParseResult<I, O, E> = Result<(O, I), E>; pub type ParseResult<I, O, E> = Result<(O, I), E>;
pub trait ParserInput: std::fmt::Debug {}
impl ParserInput for &str {}
impl ParserInput for String {}
pub trait Parser<I, O, E> pub trait Parser<I, O, E>
where where
I: ParserInput, I: ParserInput,
{ {
fn parse(&self, input: I) -> ParseResult<I, O, E>; fn parse(&self, input: I) -> ParseResult<I, O, E>;
fn representation(&self) -> Representation;
fn boxed<'a>(self) -> BoxedParser<'a, I, O, E> fn boxed<'a>(self) -> BoxedParser<'a, I, O, E>
where where
Self: Sized + 'a, Self: Sized + 'a,
@ -144,6 +145,23 @@ where
fn parse(&self, input: I) -> ParseResult<I, O, E> { fn parse(&self, input: I) -> ParseResult<I, O, E> {
self(input) self(input)
} }
fn representation(&self) -> Representation {
Representation::new("NOT IMPL'D")
}
}
impl<I: ParserInput, O, E, F> Parser<I, O, E> for (F, Representation)
where
F: Fn(I) -> ParseResult<I, O, E>,
{
fn parse(&self, input: I) -> ParseResult<I, O, E> {
self.0(input)
}
fn representation(&self) -> Representation {
self.1.clone()
}
} }
impl<I, O, E, T> Parser<I, O, E> for Rc<T> impl<I, O, E, T> Parser<I, O, E> for Rc<T>
@ -154,4 +172,8 @@ where
fn parse(&self, input: I) -> ParseResult<I, O, E> { fn parse(&self, input: I) -> ParseResult<I, O, E> {
self.as_ref().parse(input) self.as_ref().parse(input)
} }
fn representation(&self) -> Representation {
self.as_ref().representation()
}
} }

View File

@ -1,5 +1,5 @@
use crate::parser::{ParseResult, Parser, ParserInput};
use super::boxed_parser::BoxedParser; use super::boxed_parser::BoxedParser;
use crate::parser::{ParseResult, Parser, ParserInput, Representation};
pub struct NamedParser<'a, I, O, E> pub struct NamedParser<'a, I, O, E>
where where
@ -26,6 +26,10 @@ where
} }
impl<'a, I: ParserInput, O, E> Parser<I, O, E> for NamedParser<'a, I, O, E> { impl<'a, I: ParserInput, O, E> Parser<I, O, E> for NamedParser<'a, I, O, E> {
fn representation(&self) -> Representation {
Representation::new("NOT IMPL'D")
}
fn parse(&self, input: I) -> ParseResult<I, O, E> { fn parse(&self, input: I) -> ParseResult<I, O, E> {
self.inner_parser.parse(input) self.inner_parser.parse(input)
} }

View File

@ -0,0 +1,11 @@
pub trait ParserInput: std::fmt::Debug {
type Output;
fn next_token() -> Self::Output;
}
impl ParserInput for &str {
type Output = ();
fn next_token() -> Self::Output {
()
}
}

View File

@ -0,0 +1,12 @@
#[derive(Debug, Clone)]
pub struct Representation {
val: String
}
impl Representation {
pub fn new(from: &str) -> Self {
Self { val: from.to_string() }
}
}

View File

@ -1,4 +1,4 @@
use crate::parser::{ParseResult, Parser, ParserInput}; use crate::parser::{ParseResult, Parser, ParserInput, Representation};
pub fn literal_char(expected: char) -> impl Fn(&str) -> ParseResult<&str, char, &str> { pub fn literal_char(expected: char) -> impl Fn(&str) -> ParseResult<&str, char, &str> {
move |input| match input.chars().next() { move |input| match input.chars().next() {
@ -7,11 +7,13 @@ pub fn literal_char(expected: char) -> impl Fn(&str) -> ParseResult<&str, char,
} }
} }
pub fn literal(expected: &'static str) -> impl Fn(&str) -> ParseResult<&str, &str, &str> { pub fn literal<'a>(expected: &'static str) -> impl Parser<&'a str, &'a str, &'a str> {
move |input| match input.get(0..expected.len()) { println!("literal call expected: {}", expected);
let p = move |input: &'a str| match input.get(0..expected.len()) {
Some(next) if next == expected => Ok((expected, &input[expected.len()..])), Some(next) if next == expected => Ok((expected, &input[expected.len()..])),
_ => Err(input), _ => Err(input),
} };
(p, Representation::new("yolo"))
} }
pub fn any_char(input: &str) -> ParseResult<&str, char, &str> { pub fn any_char(input: &str) -> ParseResult<&str, char, &str> {
@ -22,15 +24,23 @@ pub fn any_char(input: &str) -> ParseResult<&str, char, &str> {
} }
pub fn one_of<'a>(items: &'static str) -> impl Parser<&'a str, &'a str, &'a str> { pub fn one_of<'a>(items: &'static str) -> impl Parser<&'a str, &'a str, &'a str> {
move |input: &'a str| { let p = move |input: &'a str| {
if let Some(ch) = input.chars().nth(0) { if let Some(ch) = input.chars().next() {
if items.contains(ch) { if items.contains(ch) {
let (first, rest) = input.split_at(1); let (first, rest) = input.split_at(1);
return Ok((first, rest)); return Ok((first, rest));
} }
} }
Err(input) Err(input)
};
let mut s = String::new();
for ch in items.chars() {
s.push(ch);
s.push_str(" | ");
} }
let rep = Representation::new(&s);
(p, rep)
} }
pub fn pred<P, F, I, O>(parser: P, pred_fn: F) -> impl Parser<I, O, I> pub fn pred<P, F, I, O>(parser: P, pred_fn: F) -> impl Parser<I, O, I>
@ -39,7 +49,8 @@ where
P: Parser<I, O, I>, P: Parser<I, O, I>,
F: Fn(&O) -> bool, F: Fn(&O) -> bool,
{ {
move |input| { let orig_rep = parser.representation();
(move |input| {
parser.parse(input).and_then(|(result, rest)| { parser.parse(input).and_then(|(result, rest)| {
if pred_fn(&result) { if pred_fn(&result) {
Ok((result, rest)) Ok((result, rest))
@ -47,7 +58,7 @@ where
Err(rest) Err(rest)
} }
}) })
} }, Representation::new(&format!("{:?} if <PREDICATE>", orig_rep)))
} }
/// Parses a standard identifier in a programming language /// Parses a standard identifier in a programming language
@ -60,7 +71,7 @@ pub fn identifier(input: &str) -> ParseResult<&str, String, &str> {
_ => return Err(input), _ => return Err(input),
} }
while let Some(next) = chars.next() { for next in chars {
if next.is_alphanumeric() { if next.is_alphanumeric() {
buf.push(next); buf.push(next);
} else { } else {

231
tests/json_parser.rs Normal file
View File

@ -0,0 +1,231 @@
use parser_combinator::choice::choice;
use parser_combinator::combinators::repeated;
use parser_combinator::primitives::{any_char, literal, literal_char, one_of, pred};
use parser_combinator::sequence::seq;
use parser_combinator::Parser;
use proptest::prelude::*;
proptest! {
#[test]
fn doesnt_crash(s in "\\PC*") {
let _output = json_object().parse(&s);
}
#[test]
fn parse_string(s in r#"[^"]+"#) {
let input = format!("\"{}\"", s);
let output = json_string().parse(&input).unwrap();
match output {
(JsonValue::Str(output_s), "") if output_s == s => (),
_ => panic!(),
}
}
}
#[test]
fn test_parsing() {
let output = literal("a").parse("a yolo");
assert_eq!(output.unwrap(), ("a", " yolo"));
}
/*
* JSON BNF
* <JSON> ::= <value>
<value> ::= <object> | <array> | <boolean> | <string> | <number> | <null>
<array> ::= "[" [<value>] {"," <value>}* "]"
<object> ::= "{" [<property>] {"," <property>}* "}"
<property> ::= <string> ":" <value>
*/
#[derive(Debug, Clone, PartialEq)]
enum JsonValue {
Null,
Bool(bool),
Str(String),
Num(f64),
Array(Vec<JsonValue>),
Object(Vec<(String, JsonValue)>),
}
trait JsonParser<'a, T>: Parser<&'a str, T, &'a str> {}
impl<'a, T, P> JsonParser<'a, T> for P where P: Parser<&'a str, T, &'a str> {}
fn json_null<'a>() -> impl JsonParser<'a, JsonValue> {
literal("null").to(JsonValue::Null)
}
fn json_bool<'a>() -> impl JsonParser<'a, JsonValue> {
choice((
literal("true").to(JsonValue::Bool(true)),
literal("false").to(JsonValue::Bool(false)),
))
}
fn json_number<'a>() -> impl JsonParser<'a, JsonValue> {
let digit = || one_of("1234567890");
let digits = || repeated(digit()).at_least(1);
let json_number_inner = choice((
seq((digits(), literal(".").ignore_then(digits()).optional())).map(
|(mut digits, maybe_decimal)| {
if let Some(decimal_digits) = maybe_decimal {
digits.push(".");
digits.extend(decimal_digits.into_iter());
}
digits.into_iter().collect::<String>()
},
),
literal(".").ignore_then(digits()).map(|decimal_digits| {
let mut d = vec!["."];
d.extend(decimal_digits.into_iter());
d.into_iter().collect::<String>()
}),
))
.map(|digits| digits.parse::<f64>().unwrap());
literal("-")
.optional()
.then(json_number_inner)
.map(|(maybe_sign, mut val)| {
if maybe_sign.is_some() {
val *= -1.0;
}
JsonValue::Num(val)
})
}
fn json_string_raw<'a>() -> impl JsonParser<'a, String> {
seq((
literal_char('"'),
repeated(pred(any_char, |ch| *ch != '"')),
literal_char('"'),
))
.map(|(_, s, _)| s.iter().cloned().collect::<String>())
}
fn json_string<'a>() -> impl JsonParser<'a, JsonValue> {
json_string_raw().map(JsonValue::Str)
}
fn whitespace<'a>() -> impl JsonParser<'a, ()> {
repeated(choice((
literal_char('\t'),
literal_char('\n'),
literal_char(' '),
)))
.to(())
}
fn json_array<'a>() -> impl JsonParser<'a, JsonValue> {
move |input| {
let val = json_value().surrounded_by(whitespace());
repeated(val)
.separated_by(literal(","), false)
.delimited(literal_char('['), literal_char(']'))
.map(JsonValue::Array)
.parse(input)
}
}
fn json_object<'a>() -> impl JsonParser<'a, JsonValue> {
move |input| {
let kv = json_string_raw()
.surrounded_by(whitespace())
.then_ignore(literal_char(':'))
.then(json_value().surrounded_by(whitespace()));
repeated(kv)
.separated_by(literal_char(','), false)
.delimited(literal_char('{'), literal_char('}'))
.map(JsonValue::Object)
.parse(input)
}
}
fn json_value<'a>() -> impl JsonParser<'a, JsonValue> {
choice((
json_null(),
json_bool(),
json_number(),
json_string(),
json_array(),
json_object(),
))
}
#[test]
fn parse_json_primitives() {
assert_eq!(
json_string().parse(r#""yolo swagg""#).unwrap(),
(JsonValue::Str("yolo swagg".into()), "")
);
assert_eq!(
json_number().parse("-383").unwrap().0,
JsonValue::Num(-383f64)
);
assert_eq!(
json_number().parse("-.383").unwrap().0,
JsonValue::Num(-0.383)
);
assert_eq!(
json_number().parse(".383").unwrap().0,
JsonValue::Num(0.383)
);
assert_eq!(
json_number().parse("-1.383").unwrap().0,
JsonValue::Num(-1.383)
);
}
#[test]
fn parse_json_array() {
assert!(json_array().parse(r#"[ 4, 9, "ara",]"#).is_err());
assert_eq!(
json_array().parse("[[],[]]").unwrap().0,
JsonValue::Array(vec![JsonValue::Array(vec![]), JsonValue::Array(vec![])])
);
assert_eq!(
json_array().parse(r#"[ 4, 9, "foo" ]"#).unwrap(),
(
JsonValue::Array(vec![
JsonValue::Num(4.),
JsonValue::Num(9.0),
JsonValue::Str("foo".to_string())
]),
""
)
);
assert_eq!(
json_array().parse(r#"[8,null,[],5],{}"#).unwrap(),
(
JsonValue::Array(vec![
JsonValue::Num(8.),
JsonValue::Null,
JsonValue::Array(vec![]),
JsonValue::Num(5.),
]),
",{}"
)
);
assert_eq!(json_value().parse("true"), Ok((JsonValue::Bool(true), "")));
}
#[test]
fn parse_json_object() {
assert_eq!(
json_object().parse(r#"{ "a": 23}"#).unwrap().0,
JsonValue::Object(vec![("a".into(), JsonValue::Num(23.))])
);
assert_eq!(
json_object().parse(r#"{}"#).unwrap().0,
JsonValue::Object(vec![])
);
}
#[test]
fn parse_json_document() {
let test_json = include_str!("joplin-cfg.json");
let parsed_json = json_object().parse(test_json);
assert!(parsed_json.is_ok());
}