Compare commits
38 Commits
Author | SHA1 | Date | |
---|---|---|---|
|
575c915136 | ||
|
e47bcbe760 | ||
|
e6e1d14eee | ||
|
a189f34c37 | ||
|
9c2228dbff | ||
|
4818b23c3b | ||
|
0829b16fc9 | ||
|
9ed860383e | ||
|
be501f540e | ||
|
a55a806a60 | ||
|
698e05081a | ||
|
4e813a7efd | ||
|
b042e06084 | ||
|
5141cdadd9 | ||
|
2909cdf296 | ||
|
33899ae66e | ||
|
a9d08a9213 | ||
|
cdbbb8214f | ||
|
d8a68bcbf3 | ||
|
51d7380761 | ||
|
ea6a513572 | ||
|
05c9ada7c6 | ||
|
bb06350404 | ||
|
56042dbbe2 | ||
|
3669d5d2cc | ||
|
918e3d042b | ||
|
afae0d0840 | ||
|
2ad7707349 | ||
|
0e26ef1ea6 | ||
|
9efd9d78d1 | ||
|
e697b8ed21 | ||
|
477fc50b65 | ||
|
f2ff509748 | ||
|
bffaca4d68 | ||
|
97d35df687 | ||
|
41829019b6 | ||
|
cbb30d3e9f | ||
|
8f00b77c2c |
6
justfile
Normal file
6
justfile
Normal file
@ -0,0 +1,6 @@
|
||||
_default:
|
||||
just --list
|
||||
|
||||
|
||||
test *args:
|
||||
cargo nextest run {{args}}
|
55
src/annotated.rs
Normal file
55
src/annotated.rs
Normal file
@ -0,0 +1,55 @@
|
||||
use std::marker::PhantomData;
|
||||
|
||||
use crate::{representation::Representation, ParseResult, Parser};
|
||||
|
||||
pub struct AnnotatedParser<P, I, O, E>
|
||||
where
|
||||
P: Parser<I, O, E>,
|
||||
{
|
||||
inner: P,
|
||||
name: Option<String>,
|
||||
repr: Representation,
|
||||
phantom: PhantomData<(I, O, E)>,
|
||||
}
|
||||
|
||||
impl<P, I, O, E> Parser<I, O, E> for AnnotatedParser<P, I, O, E>
|
||||
where
|
||||
P: Parser<I, O, E>,
|
||||
{
|
||||
fn parse(&self, input: I) -> ParseResult<I, O, E> {
|
||||
self.inner.parse(input)
|
||||
}
|
||||
|
||||
fn name(&self) -> Option<String> {
|
||||
self.name.clone()
|
||||
}
|
||||
|
||||
fn representation(&self) -> Representation {
|
||||
self.repr.clone()
|
||||
}
|
||||
}
|
||||
|
||||
impl<P, I, O, E> AnnotatedParser<P, I, O, E>
|
||||
where
|
||||
P: Parser<I, O, E>,
|
||||
{
|
||||
pub fn new(inner: P) -> Self {
|
||||
Self {
|
||||
inner,
|
||||
name: None,
|
||||
repr: Representation::new(),
|
||||
phantom: PhantomData,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn with_name(self, name: &str) -> Self {
|
||||
Self {
|
||||
name: Some(name.to_string()),
|
||||
..self
|
||||
}
|
||||
}
|
||||
|
||||
pub fn with_repr(self, repr: Representation) -> Self {
|
||||
Self { repr, ..self }
|
||||
}
|
||||
}
|
74
src/choice.rs
Normal file
74
src/choice.rs
Normal file
@ -0,0 +1,74 @@
|
||||
use crate::{ParseResult, Parser};
|
||||
|
||||
pub trait Choice<I, O, E> {
|
||||
fn parse_choice(&self, input: I) -> Result<(O, I), (E, I)>;
|
||||
}
|
||||
|
||||
pub fn choice<C: Choice<I, O, E>, I, O, E>(choices: C) -> impl Parser<I, O, E> {
|
||||
move |input| choices.parse_choice(input)
|
||||
}
|
||||
|
||||
fn choice_loop<'a, I, O, E>(
|
||||
mut input: I,
|
||||
parsers: &'a [&'a dyn Parser<I, O, E>],
|
||||
) -> ParseResult<I, O, E> {
|
||||
//TODO need a more principled way to return an error when no choices work
|
||||
let mut err = None;
|
||||
|
||||
for parser in parsers.iter() {
|
||||
match parser.parse(input) {
|
||||
Ok(res) => return Ok(res),
|
||||
Err((e, rest)) => {
|
||||
err = Some(e);
|
||||
input = rest;
|
||||
}
|
||||
}
|
||||
}
|
||||
Err((err.unwrap(), input))
|
||||
}
|
||||
|
||||
impl<P1, P2, I, O, E> Choice<I, O, E> for (P1, P2)
|
||||
where
|
||||
P1: Parser<I, O, E>,
|
||||
P2: Parser<I, O, E>,
|
||||
{
|
||||
fn parse_choice(&self, input: I) -> Result<(O, I), (E, I)> {
|
||||
choice_loop(input, &[&self.0, &self.1])
|
||||
}
|
||||
}
|
||||
|
||||
impl<P1, P2, P3, I, O, E> Choice<I, O, E> for (P1, P2, P3)
|
||||
where
|
||||
P1: Parser<I, O, E>,
|
||||
P2: Parser<I, O, E>,
|
||||
P3: Parser<I, O, E>,
|
||||
{
|
||||
fn parse_choice(&self, input: I) -> Result<(O, I), (E, I)> {
|
||||
choice_loop(input, &[&self.0, &self.1, &self.2])
|
||||
}
|
||||
}
|
||||
|
||||
impl<P1, P2, P3, P4, I, O, E> Choice<I, O, E> for (P1, P2, P3, P4)
|
||||
where
|
||||
P1: Parser<I, O, E>,
|
||||
P2: Parser<I, O, E>,
|
||||
P3: Parser<I, O, E>,
|
||||
P4: Parser<I, O, E>,
|
||||
{
|
||||
fn parse_choice(&self, input: I) -> Result<(O, I), (E, I)> {
|
||||
choice_loop(input, &[&self.0, &self.1, &self.2, &self.3])
|
||||
}
|
||||
}
|
||||
|
||||
impl<P1, P2, P3, P4, P5, I, O, E> Choice<I, O, E> for (P1, P2, P3, P4, P5)
|
||||
where
|
||||
P1: Parser<I, O, E>,
|
||||
P2: Parser<I, O, E>,
|
||||
P3: Parser<I, O, E>,
|
||||
P4: Parser<I, O, E>,
|
||||
P5: Parser<I, O, E>,
|
||||
{
|
||||
fn parse_choice(&self, input: I) -> Result<(O, I), (E, I)> {
|
||||
choice_loop(input, &[&self.0, &self.1, &self.2, &self.3, &self.4])
|
||||
}
|
||||
}
|
234
src/combinators.rs
Normal file
234
src/combinators.rs
Normal file
@ -0,0 +1,234 @@
|
||||
use std::marker::PhantomData;
|
||||
|
||||
use crate::{
|
||||
representation::{Representation, EBNF},
|
||||
ParseResult, Parser,
|
||||
};
|
||||
|
||||
pub fn repeated<P, I, O, E>(parser: P) -> Repeated<P, I, O, E>
|
||||
where
|
||||
P: Parser<I, O, E>,
|
||||
{
|
||||
Repeated::new(parser)
|
||||
}
|
||||
|
||||
pub struct Repeated<P, I, O, E>
|
||||
where
|
||||
P: Parser<I, O, E>,
|
||||
{
|
||||
inner_parser: P,
|
||||
phantom: PhantomData<(I, O, E)>,
|
||||
at_least: Option<u32>,
|
||||
at_most: Option<u32>,
|
||||
}
|
||||
|
||||
impl<P, I, O, E> Repeated<P, I, O, E>
|
||||
where
|
||||
P: Parser<I, O, E>,
|
||||
{
|
||||
fn new(inner_parser: P) -> Self {
|
||||
Self {
|
||||
inner_parser,
|
||||
phantom: PhantomData,
|
||||
at_least: None,
|
||||
at_most: None,
|
||||
}
|
||||
}
|
||||
pub fn at_least(self, at_least: u32) -> Self {
|
||||
Self {
|
||||
at_least: Some(at_least),
|
||||
..self
|
||||
}
|
||||
}
|
||||
pub fn at_most(self, at_most: u32) -> Self {
|
||||
Self {
|
||||
at_most: Some(at_most),
|
||||
..self
|
||||
}
|
||||
}
|
||||
pub fn separated_by<D>(self, delimiter: D) -> SeparatedBy<D, P, I, O, E>
|
||||
where
|
||||
D: Parser<I, (), E>,
|
||||
E: Default,
|
||||
{
|
||||
SeparatedBy {
|
||||
inner_repeated: self,
|
||||
delimiter,
|
||||
allow_trailing: false,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<P, I, O, E> Parser<I, Vec<O>, E> for Repeated<P, I, O, E>
|
||||
where
|
||||
P: Parser<I, O, E>,
|
||||
E: Default,
|
||||
{
|
||||
fn parse(&self, mut input: I) -> ParseResult<I, Vec<O>, E> {
|
||||
let at_least = self.at_least.unwrap_or(0);
|
||||
let at_most = self.at_most.unwrap_or(u32::MAX);
|
||||
|
||||
let mut results = vec![];
|
||||
let mut count = 0;
|
||||
|
||||
if at_most == 0 {
|
||||
return Ok((vec![], input));
|
||||
}
|
||||
|
||||
loop {
|
||||
match self.inner_parser.parse(input) {
|
||||
Ok((item, rest)) => {
|
||||
results.push(item);
|
||||
input = rest;
|
||||
count += 1;
|
||||
if count >= at_most {
|
||||
break;
|
||||
}
|
||||
}
|
||||
Err((_err, rest)) => {
|
||||
input = rest;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if count < at_least {
|
||||
return Err((Default::default(), input));
|
||||
}
|
||||
Ok((results, input))
|
||||
}
|
||||
|
||||
fn name(&self) -> Option<String> {
|
||||
self.inner_parser.name()
|
||||
}
|
||||
|
||||
fn representation(&self) -> Representation {
|
||||
let at_least = self.at_least.unwrap_or(0);
|
||||
//TODO flesh this out better
|
||||
let _at_most = self.at_most.unwrap_or(u32::MAX);
|
||||
let production = EBNF::Repeated {
|
||||
inner: Box::new(self.inner_parser.representation().production()),
|
||||
more_than_once: at_least >= 1,
|
||||
};
|
||||
Representation::new().with_production(production)
|
||||
}
|
||||
}
|
||||
|
||||
pub struct SeparatedBy<D, P, I, O, E>
|
||||
where
|
||||
D: Parser<I, (), E>,
|
||||
P: Parser<I, O, E>,
|
||||
E: Default,
|
||||
{
|
||||
inner_repeated: Repeated<P, I, O, E>,
|
||||
delimiter: D,
|
||||
allow_trailing: bool,
|
||||
}
|
||||
|
||||
impl<D, P, I, O, E> SeparatedBy<D, P, I, O, E>
|
||||
where
|
||||
D: Parser<I, (), E>,
|
||||
P: Parser<I, O, E>,
|
||||
E: Default,
|
||||
{
|
||||
pub fn allow_trailing(self, allow_trailing: bool) -> Self {
|
||||
Self {
|
||||
allow_trailing,
|
||||
..self
|
||||
}
|
||||
}
|
||||
}
|
||||
impl<D, P, I, O, E> Parser<I, Vec<O>, E> for SeparatedBy<D, P, I, O, E>
|
||||
where
|
||||
D: Parser<I, (), E>,
|
||||
P: Parser<I, O, E>,
|
||||
E: Default,
|
||||
{
|
||||
fn parse(&self, mut input: I) -> ParseResult<I, Vec<O>, E> {
|
||||
let at_least = self.inner_repeated.at_least.unwrap_or(0);
|
||||
let at_most = self.inner_repeated.at_most.unwrap_or(u32::MAX);
|
||||
let inner = &self.inner_repeated.inner_parser;
|
||||
let delimiter = &self.delimiter;
|
||||
|
||||
if at_most == 0 {
|
||||
return Ok((vec![], input));
|
||||
}
|
||||
|
||||
let mut results = Vec::new();
|
||||
let mut count: u32 = 0;
|
||||
|
||||
match inner.parse(input) {
|
||||
Ok((item, rest)) => {
|
||||
results.push(item);
|
||||
input = rest;
|
||||
}
|
||||
Err((err, rest)) => {
|
||||
if at_least > 0 {
|
||||
return Err((err, rest));
|
||||
} else {
|
||||
return Ok((vec![], rest));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
loop {
|
||||
match delimiter.parse(input) {
|
||||
Ok(((), rest)) => {
|
||||
input = rest;
|
||||
count += 1;
|
||||
}
|
||||
Err((_err, rest)) => {
|
||||
input = rest;
|
||||
break;
|
||||
}
|
||||
}
|
||||
match inner.parse(input) {
|
||||
Ok((item, rest)) => {
|
||||
input = rest;
|
||||
results.push(item);
|
||||
}
|
||||
Err((err, rest)) => {
|
||||
if self.allow_trailing {
|
||||
input = rest;
|
||||
break;
|
||||
} else {
|
||||
return Err((err, rest));
|
||||
}
|
||||
}
|
||||
}
|
||||
if count >= at_most {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if count < at_least {
|
||||
//return Err(??, rest) <- need to handle errors better
|
||||
unimplemented!();
|
||||
}
|
||||
|
||||
Ok((results, input))
|
||||
}
|
||||
|
||||
fn representation(&self) -> Representation {
|
||||
let inner = &self.inner_repeated.inner_parser;
|
||||
let at_least = self.inner_repeated.at_least.unwrap_or(0);
|
||||
let inner_production = inner.representation().production();
|
||||
let delimiter_production = self.delimiter.representation().production();
|
||||
|
||||
let production = EBNF::Repeated {
|
||||
inner: Box::new(EBNF::Sequence(vec![inner_production, delimiter_production])),
|
||||
more_than_once: at_least >= 1,
|
||||
};
|
||||
Representation::new().with_production(production)
|
||||
}
|
||||
}
|
||||
|
||||
pub fn optional<I, O, E>(parser: impl Parser<I, O, E>) -> impl Parser<I, Option<O>, E>
|
||||
where
|
||||
I: Clone,
|
||||
{
|
||||
move |input: I| match parser.parse(input.clone()) {
|
||||
Ok((output, rest)) => Ok((Some(output), rest)),
|
||||
Err(_e) => Ok((None, input)),
|
||||
}
|
||||
}
|
47
src/lib.rs
47
src/lib.rs
@ -1,35 +1,20 @@
|
||||
#![feature(assert_matches)]
|
||||
#![allow(dead_code)] //TODO eventually turn this off
|
||||
|
||||
|
||||
type ParseResult<I, O, E> = Result<(O, I), E>;
|
||||
|
||||
trait Parser<I, O, E> {
|
||||
fn parse(&self, input: I) -> ParseResult<I, O, E>;
|
||||
}
|
||||
|
||||
impl<I, O, E, F> Parser<I, O, E> for F where F: Fn(I) -> ParseResult<I, O, E> {
|
||||
fn parse(&self, input: I) -> ParseResult<I, O, E> {
|
||||
self(input)
|
||||
}
|
||||
}
|
||||
|
||||
fn literal(expected: &'static str) -> impl Fn(&str) -> ParseResult<&str, (), &str> {
|
||||
move |input| match input.get(0..expected.len()) {
|
||||
Some(next) if next == expected =>
|
||||
Ok(((), &input[expected.len()..])),
|
||||
_ => Err(input)
|
||||
}
|
||||
}
|
||||
mod annotated;
|
||||
mod choice;
|
||||
mod combinators;
|
||||
mod map;
|
||||
mod parser;
|
||||
mod primitives;
|
||||
mod representation;
|
||||
mod sequence;
|
||||
mod util;
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use std::assert_matches::assert_matches;
|
||||
mod test;
|
||||
|
||||
#[test]
|
||||
fn parsing() {
|
||||
let output = literal("a")("a yolo");
|
||||
assert_matches!(output.unwrap(), ((), " yolo"));
|
||||
}
|
||||
}
|
||||
pub use choice::*;
|
||||
pub use combinators::*;
|
||||
pub use map::*;
|
||||
pub use parser::{ParseResult, Parser, ParserExtension};
|
||||
pub use primitives::*;
|
||||
pub use sequence::*;
|
||||
|
16
src/map.rs
Normal file
16
src/map.rs
Normal file
@ -0,0 +1,16 @@
|
||||
use crate::{representation::Representation, Parser, ParserExtension};
|
||||
|
||||
pub fn map<P, F, I, O1, O2, E>(parser: P, map_fn: F) -> impl Parser<I, O2, E>
|
||||
where
|
||||
P: Parser<I, O1, E>,
|
||||
F: Fn(O1) -> O2,
|
||||
{
|
||||
let production = parser.representation().production();
|
||||
(move |input| {
|
||||
parser
|
||||
.parse(input)
|
||||
.map(|(result, rest)| (map_fn(result), rest))
|
||||
})
|
||||
.to_anno()
|
||||
.with_repr(Representation::new().with_production(production))
|
||||
}
|
76
src/parser.rs
Normal file
76
src/parser.rs
Normal file
@ -0,0 +1,76 @@
|
||||
use crate::{annotated::AnnotatedParser, map, representation::Representation, seq2, surrounded_by};
|
||||
|
||||
pub type ParseResult<I, O, E> = Result<(O, I), (E, I)>;
|
||||
|
||||
pub trait Parser<I, O, E> {
|
||||
fn parse(&self, input: I) -> ParseResult<I, O, E>;
|
||||
fn name(&self) -> Option<String> {
|
||||
None
|
||||
}
|
||||
fn representation(&self) -> Representation {
|
||||
Representation::new()
|
||||
}
|
||||
}
|
||||
|
||||
impl<I, O, E, F> Parser<I, O, E> for F
|
||||
where
|
||||
F: Fn(I) -> ParseResult<I, O, E>,
|
||||
{
|
||||
fn parse(&self, input: I) -> ParseResult<I, O, E> {
|
||||
self(input)
|
||||
}
|
||||
}
|
||||
|
||||
pub trait ParserExtension<I, O, E>: Parser<I, O, E> {
|
||||
fn map<F, O2>(self, map_fn: F) -> impl Parser<I, O2, E>
|
||||
where
|
||||
F: Fn(O) -> O2;
|
||||
|
||||
fn to<O2: Clone>(self, item: O2) -> impl Parser<I, O2, E>;
|
||||
fn then<O2, P: Parser<I, O2, E>>(self, next: P) -> impl Parser<I, (O, O2), E>;
|
||||
fn then_ignore<O2, P: Parser<I, O2, E>>(self, next: P) -> impl Parser<I, O, E>;
|
||||
fn ignore_then<O2, P: Parser<I, O2, E>>(self, next: P) -> impl Parser<I, O2, E>;
|
||||
fn surrounded_by<O2>(self, surrounding: impl Parser<I, O2, E>) -> impl Parser<I, O, E>;
|
||||
fn to_anno(self) -> AnnotatedParser<Self, I, O, E>
|
||||
where
|
||||
Self: Sized,
|
||||
{
|
||||
AnnotatedParser::new(self)
|
||||
}
|
||||
fn to_named(self, name: &str) -> AnnotatedParser<Self, I, O, E>
|
||||
where
|
||||
Self: Sized,
|
||||
{
|
||||
AnnotatedParser::new(self).with_name(name)
|
||||
}
|
||||
}
|
||||
|
||||
impl<T, I, O, E> ParserExtension<I, O, E> for T
|
||||
where
|
||||
T: Parser<I, O, E>,
|
||||
{
|
||||
fn map<F, O2>(self, map_fn: F) -> impl Parser<I, O2, E>
|
||||
where
|
||||
F: Fn(O) -> O2,
|
||||
{
|
||||
map(self, map_fn)
|
||||
}
|
||||
|
||||
fn to<O2: Clone>(self, item: O2) -> impl Parser<I, O2, E> {
|
||||
self.map(move |_| item.clone())
|
||||
}
|
||||
|
||||
fn then<O2, P: Parser<I, O2, E>>(self, next: P) -> impl Parser<I, (O, O2), E> {
|
||||
seq2(self, next)
|
||||
}
|
||||
|
||||
fn then_ignore<O2, P: Parser<I, O2, E>>(self, next: P) -> impl Parser<I, O, E> {
|
||||
seq2(self, next).map(|(this, _)| this)
|
||||
}
|
||||
fn ignore_then<O2, P: Parser<I, O2, E>>(self, next: P) -> impl Parser<I, O2, E> {
|
||||
seq2(self, next).map(|(_, next)| next)
|
||||
}
|
||||
fn surrounded_by<O2>(self, surrounding: impl Parser<I, O2, E>) -> impl Parser<I, O, E> {
|
||||
surrounded_by(self, surrounding)
|
||||
}
|
||||
}
|
110
src/primitives.rs
Normal file
110
src/primitives.rs
Normal file
@ -0,0 +1,110 @@
|
||||
use crate::{
|
||||
representation::{Representation, EBNF},
|
||||
ParseResult, Parser, ParserExtension,
|
||||
};
|
||||
|
||||
pub fn literal<'a>(expected: &'static str) -> impl Parser<&'a str, &'a str, ()> {
|
||||
let p = move |input: &'a str| match input.get(0..expected.len()) {
|
||||
Some(next) if next == expected => Ok((next, &input[expected.len()..])),
|
||||
_ => Err(((), input)),
|
||||
};
|
||||
let production = EBNF::StringTerminal(expected.into());
|
||||
p.to_anno()
|
||||
.with_repr(Representation::new().with_production(production))
|
||||
}
|
||||
|
||||
pub fn literal_char<'a>(expected: char) -> impl Parser<&'a str, char, ()> {
|
||||
(move |input: &'a str| match input.chars().next() {
|
||||
Some(ch) if ch == expected => Ok((expected, &input[ch.len_utf8()..])),
|
||||
_ => Err(((), input)),
|
||||
})
|
||||
.to_anno()
|
||||
.with_repr(Representation::new().with_production(EBNF::CharTerminal(expected)))
|
||||
}
|
||||
|
||||
pub fn one_of<'a>(items: &'static str) -> impl Parser<&'a str, char, ()> {
|
||||
(move |input: &'a str| {
|
||||
if let Some(ch) = input.chars().next() {
|
||||
if items.contains(ch) {
|
||||
let (_first, rest) = input.split_at(1);
|
||||
return Ok((ch, rest));
|
||||
}
|
||||
}
|
||||
Err(((), input))
|
||||
})
|
||||
.to_anno()
|
||||
.with_repr(
|
||||
Representation::new().with_production(EBNF::Alternation(
|
||||
items
|
||||
.chars()
|
||||
.map(|ch| EBNF::CharTerminal(ch))
|
||||
.collect::<Vec<_>>(),
|
||||
)),
|
||||
)
|
||||
}
|
||||
|
||||
/// Parses a standard identifier in a programming language
|
||||
pub fn identifier(input: &str) -> ParseResult<&str, String, ()> {
|
||||
let mut chars = input.chars();
|
||||
let mut buf = String::new();
|
||||
|
||||
match chars.next() {
|
||||
Some(ch) if ch.is_alphabetic() => buf.push(ch),
|
||||
_ => return Err(((), input)),
|
||||
}
|
||||
|
||||
for next in chars {
|
||||
if next.is_alphanumeric() {
|
||||
buf.push(next);
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
let next_index = buf.len();
|
||||
Ok((buf, &input[next_index..]))
|
||||
}
|
||||
|
||||
pub struct Whitespace;
|
||||
|
||||
impl Parser<&str, char, ()> for Whitespace {
|
||||
fn name(&self) -> Option<String> {
|
||||
Some("whitespace".into())
|
||||
}
|
||||
|
||||
fn representation(&self) -> Representation {
|
||||
Representation::new().with_production(EBNF::LabeledTerminal("whitespace".into()))
|
||||
}
|
||||
|
||||
fn parse<'a>(&self, input: &'a str) -> ParseResult<&'a str, char, ()> {
|
||||
match input.chars().next() {
|
||||
Some(ch) if ch.is_whitespace() => Ok((ch, &input[1..])),
|
||||
_ => Err(((), input)),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn primitive_parsers() {
|
||||
let parser = literal_char('f');
|
||||
assert_eq!(Ok(('f', "unky")), parser.parse("funky"));
|
||||
|
||||
let repr = parser.representation();
|
||||
assert!(matches!(repr.production(), EBNF::CharTerminal('f')));
|
||||
|
||||
let parser = one_of("asdf");
|
||||
let production = parser.representation().production();
|
||||
assert!(
|
||||
matches!(production, EBNF::Alternation(v) if matches!(v.as_slice(), [
|
||||
EBNF::CharTerminal('a'),
|
||||
EBNF::CharTerminal('s'),
|
||||
EBNF::CharTerminal('d'),
|
||||
EBNF::CharTerminal('f'),
|
||||
]))
|
||||
);
|
||||
}
|
||||
}
|
129
src/representation.rs
Normal file
129
src/representation.rs
Normal file
@ -0,0 +1,129 @@
|
||||
use std::fmt;
|
||||
|
||||
use crate::util::intersperse_option;
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct Representation {
|
||||
production_output: EBNF,
|
||||
}
|
||||
|
||||
impl Representation {
|
||||
pub fn show(&self) -> String {
|
||||
self.production_output.to_string()
|
||||
}
|
||||
|
||||
pub fn production(&self) -> EBNF {
|
||||
self.production_output.clone()
|
||||
}
|
||||
|
||||
pub fn new() -> Self {
|
||||
Self {
|
||||
production_output: EBNF::None,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn with_production(self, production_output: EBNF) -> Self {
|
||||
Self {
|
||||
production_output,
|
||||
..self
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub enum EBNF {
|
||||
None,
|
||||
Nonterminal(String),
|
||||
CharTerminal(char),
|
||||
StringTerminal(String),
|
||||
LabeledTerminal(String),
|
||||
Alternation(Vec<EBNF>),
|
||||
Sequence(Vec<EBNF>),
|
||||
Repeated {
|
||||
inner: Box<EBNF>,
|
||||
more_than_once: bool,
|
||||
},
|
||||
}
|
||||
|
||||
impl EBNF {
|
||||
fn needs_wrapping(&self) -> bool {
|
||||
match self {
|
||||
EBNF::None => false,
|
||||
EBNF::Nonterminal(_) => false,
|
||||
EBNF::CharTerminal(_) => false,
|
||||
EBNF::StringTerminal(_) => false,
|
||||
EBNF::LabeledTerminal(_) => false,
|
||||
EBNF::Sequence(items) => items.len() > 1,
|
||||
EBNF::Alternation(_) => true,
|
||||
EBNF::Repeated { .. } => false,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Display for EBNF {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
match self {
|
||||
//TODO should try to show the name if possible
|
||||
EBNF::None => write!(f, "<no-representation>"),
|
||||
EBNF::CharTerminal(ch) => write!(f, "'{ch}'"),
|
||||
EBNF::Alternation(items) => {
|
||||
for item in intersperse_option(items.iter()) {
|
||||
match item {
|
||||
None => write!(f, " | ")?,
|
||||
Some(item) => write!(f, "{item}")?,
|
||||
}
|
||||
}
|
||||
write!(f, "")
|
||||
}
|
||||
EBNF::Nonterminal(name) => write!(f, "{name}"),
|
||||
EBNF::StringTerminal(term) => write!(f, r#""{term}""#),
|
||||
EBNF::LabeledTerminal(s) => write!(f, "<{s}>"),
|
||||
EBNF::Repeated {
|
||||
inner,
|
||||
more_than_once,
|
||||
} => {
|
||||
let sigil = if *more_than_once { '+' } else { '*' };
|
||||
if inner.needs_wrapping() {
|
||||
write!(f, "[{inner}]{sigil}")
|
||||
} else {
|
||||
write!(f, "{inner}{sigil}")
|
||||
}
|
||||
}
|
||||
EBNF::Sequence(items) => {
|
||||
for item in intersperse_option(items.iter()) {
|
||||
if let Some(item) = item {
|
||||
write!(f, "{item}")?;
|
||||
} else {
|
||||
write!(f, " ")?;
|
||||
}
|
||||
}
|
||||
write!(f, "")
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_ebnf_print() {
|
||||
let example = EBNF::Alternation(vec![
|
||||
EBNF::CharTerminal('f'),
|
||||
EBNF::CharTerminal('a'),
|
||||
EBNF::CharTerminal('k'),
|
||||
EBNF::CharTerminal('e'),
|
||||
]);
|
||||
|
||||
assert_eq!(example.to_string(), "'f' | 'a' | 'k' | 'e'");
|
||||
|
||||
let example = EBNF::Alternation(vec![
|
||||
EBNF::Nonterminal("other-rule".into()),
|
||||
EBNF::CharTerminal('q'),
|
||||
EBNF::CharTerminal('m'),
|
||||
EBNF::StringTerminal("focus".into()),
|
||||
]);
|
||||
assert_eq!(example.to_string(), "other-rule | 'q' | 'm' | \"focus\"");
|
||||
}
|
||||
}
|
102
src/sequence.rs
Normal file
102
src/sequence.rs
Normal file
@ -0,0 +1,102 @@
|
||||
use crate::{
|
||||
representation::{Representation, EBNF},
|
||||
ParseResult, Parser, ParserExtension,
|
||||
};
|
||||
|
||||
pub fn sequence<S, I, O, E>(sequence: S) -> impl Parser<I, O, E>
|
||||
where
|
||||
S: Sequence<I, O, E>,
|
||||
{
|
||||
let repr = sequence.repr();
|
||||
|
||||
(move |input| -> ParseResult<I, O, E> { sequence.parse(input) })
|
||||
.to_anno()
|
||||
.with_repr(repr)
|
||||
}
|
||||
|
||||
pub fn surrounded_by<I, O1, O2, E>(
|
||||
main: impl Parser<I, O1, E>,
|
||||
surrounding: impl Parser<I, O2, E>,
|
||||
) -> impl Parser<I, O1, E> {
|
||||
let s_prod = surrounding.representation().production();
|
||||
let main_prod = main.representation().production();
|
||||
|
||||
(move |input| {
|
||||
let (_result1, rest1) = surrounding.parse(input)?;
|
||||
let (result2, rest2) = main.parse(rest1)?;
|
||||
let (_result3, rest3) = surrounding.parse(rest2)?;
|
||||
Ok((result2, rest3))
|
||||
})
|
||||
.to_anno()
|
||||
.with_repr(Representation::new().with_production(EBNF::Sequence(vec![
|
||||
s_prod.clone(),
|
||||
main_prod,
|
||||
s_prod,
|
||||
])))
|
||||
}
|
||||
|
||||
pub fn seq2<I, O1, O2, E>(
|
||||
first: impl Parser<I, O1, E>,
|
||||
second: impl Parser<I, O2, E>,
|
||||
) -> impl Parser<I, (O1, O2), E> {
|
||||
sequence((first, second))
|
||||
}
|
||||
|
||||
pub trait Sequence<I, O, E> {
|
||||
fn parse(&self, input: I) -> ParseResult<I, O, E>;
|
||||
fn repr(&self) -> Representation;
|
||||
}
|
||||
|
||||
impl<P1, P2, I, O1, O2, E> Sequence<I, (O1, O2), E> for (P1, P2)
|
||||
where
|
||||
P1: Parser<I, O1, E>,
|
||||
P2: Parser<I, O2, E>,
|
||||
{
|
||||
fn parse(&self, input: I) -> ParseResult<I, (O1, O2), E> {
|
||||
let p1 = &self.0;
|
||||
let p2 = &self.1;
|
||||
p1.parse(input).and_then(|(result1, rest)| {
|
||||
p2.parse(rest)
|
||||
.map(|(result2, rest2)| ((result1, result2), rest2))
|
||||
})
|
||||
}
|
||||
|
||||
fn repr(&self) -> Representation {
|
||||
let p1 = &self.0;
|
||||
let p2 = &self.1;
|
||||
Representation::new().with_production(EBNF::Sequence(vec![
|
||||
p1.representation().production(),
|
||||
p2.representation().production(),
|
||||
]))
|
||||
}
|
||||
}
|
||||
|
||||
impl<P1, P2, P3, I, O1, O2, O3, E> Sequence<I, (O1, O2, O3), E> for (P1, P2, P3)
|
||||
where
|
||||
P1: Parser<I, O1, E>,
|
||||
P2: Parser<I, O2, E>,
|
||||
P3: Parser<I, O3, E>,
|
||||
{
|
||||
fn parse(&self, input: I) -> ParseResult<I, (O1, O2, O3), E> {
|
||||
let p1 = &self.0;
|
||||
let p2 = &self.1;
|
||||
let p3 = &self.2;
|
||||
|
||||
let (result1, rest1) = p1.parse(input)?;
|
||||
let (result2, rest2) = p2.parse(rest1)?;
|
||||
let (result3, rest3) = p3.parse(rest2)?;
|
||||
|
||||
Ok(((result1, result2, result3), rest3))
|
||||
}
|
||||
|
||||
fn repr(&self) -> Representation {
|
||||
let p1 = &self.0;
|
||||
let p2 = &self.1;
|
||||
let p3 = &self.2;
|
||||
Representation::new().with_production(EBNF::Sequence(vec![
|
||||
p1.representation().production(),
|
||||
p2.representation().production(),
|
||||
p3.representation().production(),
|
||||
]))
|
||||
}
|
||||
}
|
98
src/test/mod.rs
Normal file
98
src/test/mod.rs
Normal file
@ -0,0 +1,98 @@
|
||||
mod sexp;
|
||||
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn basic_parsing() {
|
||||
let (parsed, rest) = literal("a").parse("a yolo").unwrap();
|
||||
assert_eq!(parsed, "a");
|
||||
assert_eq!(rest, " yolo");
|
||||
|
||||
fn bare_function_parser(input: &str) -> ParseResult<&str, i32, String> {
|
||||
match input.chars().next() {
|
||||
Some('0') => Ok((0, &input[1..])),
|
||||
Some('1') => Ok((1, &input[1..])),
|
||||
_ => Err(("lol a parse error".to_string(), input)),
|
||||
}
|
||||
}
|
||||
|
||||
assert_eq!(bare_function_parser.parse("0foo"), Ok((0, "foo")));
|
||||
assert_eq!(
|
||||
bare_function_parser.parse("xfoo"),
|
||||
Err(("lol a parse error".to_string(), "xfoo"))
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_sequence() {
|
||||
let parser = seq2(literal("bongo"), seq2(literal(" "), literal("jonzzz")));
|
||||
let output = parser.parse("bongo jonzzz").unwrap();
|
||||
assert_eq!(output.0 .0, "bongo");
|
||||
assert_eq!(output.0 .1, (" ", "jonzzz"));
|
||||
assert_eq!(output.1, "");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_choice() {
|
||||
let parser = choice((literal("bongo"), literal("sucy"), literal("ara")));
|
||||
|
||||
let output = parser.parse("ara hajimete").unwrap();
|
||||
assert_eq!(("ara", " hajimete"), output);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_map() {
|
||||
let parser =
|
||||
seq2(literal("a"), literal("b")).map(|(a, _b): (&str, &str)| (a.to_uppercase(), 59));
|
||||
let output = parser.parse("abcd").unwrap();
|
||||
assert_eq!((("A".to_owned(), 59), "cd"), output);
|
||||
|
||||
let spaces = repeated(literal_char(' ')).at_least(1);
|
||||
let parser = seq2(literal("lute"), spaces).to(500);
|
||||
assert_eq!(parser.parse("lute "), Ok((500, "")));
|
||||
assert_eq!(parser.parse("lute"), Err(((), "")));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_combinators() {
|
||||
let parser = repeated(literal_char('a')).to(10).then(literal_char('b'));
|
||||
let output = parser.parse("aaaaaaaabcd").unwrap();
|
||||
assert_eq! {((10, 'b'), "cd"), output};
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_optional() {
|
||||
let parser = seq2(
|
||||
optional(literal("alpha")),
|
||||
seq2(repeated(literal(" ")), literal("beta")),
|
||||
);
|
||||
|
||||
let output1 = parser.parse(" beta").unwrap();
|
||||
assert_eq!(output1.0 .0, None);
|
||||
let output2 = parser.parse("alpha beta").unwrap();
|
||||
assert_eq!(output2.0 .0, Some("alpha"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_repeated() {
|
||||
let spaces = repeated(literal_char(' ')).at_least(1);
|
||||
let bongo = literal("bongo");
|
||||
let parser = repeated(bongo).separated_by(map(spaces, |_| ()));
|
||||
let output = parser.parse("bongo bongo bongo bongo");
|
||||
let output = output.unwrap();
|
||||
assert_eq!(output.0, vec!["bongo", "bongo", "bongo", "bongo"]);
|
||||
assert_eq!(parser.representation().show(), r#"["bongo" ' '+]*"#);
|
||||
|
||||
let bongos = repeated(literal("bongo"));
|
||||
let output = bongos.parse("tra la la").unwrap();
|
||||
assert_eq!(output.0.len(), 0);
|
||||
assert_eq!(output.1, "tra la la");
|
||||
|
||||
assert_eq!(bongos.representation().show(), r#""bongo"*"#);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_named_parser() {
|
||||
let parser = literal("yokel").to_named("yokelparser");
|
||||
assert_eq!(parser.name(), Some("yokelparser".to_string()));
|
||||
}
|
103
src/test/sexp.rs
Normal file
103
src/test/sexp.rs
Normal file
@ -0,0 +1,103 @@
|
||||
use crate::*;
|
||||
|
||||
#[derive(Debug, PartialEq)]
|
||||
enum Expr {
|
||||
Atom(Atom),
|
||||
List(Vec<Expr>),
|
||||
Quote(Vec<Expr>),
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, PartialEq)]
|
||||
enum Atom {
|
||||
Num(i64),
|
||||
Str(String),
|
||||
Bool(bool),
|
||||
Symbol(String),
|
||||
}
|
||||
|
||||
fn parse_bool(input: &str) -> ParseResult<&str, Atom, ()> {
|
||||
choice((
|
||||
literal("#t").to(Atom::Bool(true)),
|
||||
literal("#f").to(Atom::Bool(false)),
|
||||
))
|
||||
.parse(input)
|
||||
}
|
||||
|
||||
fn parse_symbol(input: &str) -> ParseResult<&str, Atom, ()> {
|
||||
identifier.map(Atom::Symbol).parse(input)
|
||||
}
|
||||
|
||||
fn parse_number(input: &str) -> ParseResult<&str, Atom, ()> {
|
||||
repeated(one_of("1234567890"))
|
||||
.at_least(1)
|
||||
.map(|n| Atom::Num(n.iter().collect::<String>().parse::<i64>().unwrap()))
|
||||
.parse(input)
|
||||
}
|
||||
|
||||
fn parse_atom(input: &str) -> ParseResult<&str, Atom, ()> {
|
||||
choice((parse_symbol, parse_bool, parse_number)).parse(input)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_parse_atom() {
|
||||
let output = parse_atom.parse("#t").unwrap();
|
||||
assert_eq!(output.0, Atom::Bool(true));
|
||||
|
||||
let output = parse_atom.parse("384").unwrap();
|
||||
assert_eq!(output.0, Atom::Num(384));
|
||||
}
|
||||
|
||||
fn parse_expr(input: &str) -> ParseResult<&str, Expr, ()> {
|
||||
choice((parse_list, parse_atom.map(Expr::Atom))).parse(input)
|
||||
}
|
||||
|
||||
fn parse_list(input: &str) -> ParseResult<&str, Expr, ()> {
|
||||
literal_char('(')
|
||||
.ignore_then(
|
||||
repeated(parse_expr)
|
||||
.separated_by(repeated(Whitespace).at_least(1).to(()))
|
||||
.allow_trailing(true),
|
||||
)
|
||||
.then_ignore(literal_char(')'))
|
||||
.map(Expr::List)
|
||||
.parse(input)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_parse_list() {
|
||||
let output = parse_list.parse("(1 2 (1 2) 9999 3)").unwrap();
|
||||
assert_eq!(output.1, "");
|
||||
}
|
||||
|
||||
fn parse_sexp(input: &str) -> ParseResult<&str, Expr, ()> {
|
||||
parse_list.surrounded_by(repeated(Whitespace)).parse(input)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_parse_sexp() {
|
||||
let output = parse_expr("(add 1 2)").unwrap();
|
||||
assert_eq!(
|
||||
output.0,
|
||||
Expr::List(vec![
|
||||
Expr::Atom(Atom::Symbol("add".to_string())),
|
||||
Expr::Atom(Atom::Num(1)),
|
||||
Expr::Atom(Atom::Num(2))
|
||||
])
|
||||
);
|
||||
assert_eq!(output.1, "");
|
||||
|
||||
let complex_input = r#"
|
||||
(add (mul 28 9)
|
||||
(if (eq a b) (jump #t) (hula 44))
|
||||
)"#
|
||||
.trim();
|
||||
|
||||
let output = parse_sexp(complex_input).unwrap();
|
||||
assert_eq!(output.1, "");
|
||||
}
|
||||
|
||||
#[ignore = "won't work until representations can be passed more easily around"]
|
||||
#[test]
|
||||
fn test_parse_sexp_repr() {
|
||||
assert_eq!(parse_sexp.representation().show(), r#"["bongo" ' '+]*"#);
|
||||
}
|
46
src/util.rs
Normal file
46
src/util.rs
Normal file
@ -0,0 +1,46 @@
|
||||
use std::iter::Peekable;
|
||||
|
||||
pub(crate) fn intersperse_option<I: Iterator>(iterator: I) -> impl Iterator<Item = Option<I::Item>>
|
||||
where
|
||||
I::Item: Clone,
|
||||
{
|
||||
intersperse(iterator.map(Some), None)
|
||||
}
|
||||
|
||||
pub(crate) fn intersperse<I: Iterator>(iterator: I, separator: I::Item) -> Intersperse<I>
|
||||
where
|
||||
I::Item: Clone,
|
||||
{
|
||||
Intersperse {
|
||||
inner: iterator.peekable(),
|
||||
separator,
|
||||
needs_sep: false,
|
||||
}
|
||||
}
|
||||
|
||||
pub struct Intersperse<I>
|
||||
where
|
||||
I: Iterator,
|
||||
{
|
||||
inner: Peekable<I>,
|
||||
separator: I::Item,
|
||||
needs_sep: bool,
|
||||
}
|
||||
|
||||
impl<I> Iterator for Intersperse<I>
|
||||
where
|
||||
I: Iterator,
|
||||
I::Item: Clone,
|
||||
{
|
||||
type Item = I::Item;
|
||||
|
||||
fn next(&mut self) -> Option<Self::Item> {
|
||||
if self.needs_sep && self.inner.peek().is_some() {
|
||||
self.needs_sep = false;
|
||||
Some(self.separator.clone())
|
||||
} else {
|
||||
self.needs_sep = true;
|
||||
self.inner.next()
|
||||
}
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue
Block a user