Compare commits

..

2 Commits

Author SHA1 Message Date
Greg Shuflin
96393604c3 Move parser crate items around 2021-11-14 03:55:35 -08:00
Greg Shuflin
29207876ae Get rid of old BNF grammar in doc comments
Eventually add this back in some form
2021-11-14 03:49:57 -08:00
5 changed files with 37 additions and 202 deletions

View File

@ -1,178 +1,38 @@
#![allow(clippy::upper_case_acronyms)] #![allow(clippy::upper_case_acronyms)]
//! # Parsing mod new;
//! This module is where the recursive-descent parsing methods live.
//!
//!
//! # Schala EBNF Grammar
//! This document is the authoritative grammar of Schala, represented in something approximating
//! Extended Backus-Naur form. Terminal productions are in "double quotes", or UPPERCASE
//! if they represent a class of tokens rather than an specific string, or are otherwise
//! unreprsentable in ASCII.
//!
//! ## Top level structure
//!
//! ```text
//! program := (statement delimiter)* EOF
//! delimiter := NEWLINE | ";"
//! statement := expression | declaration | import | flow
//! block := "{" (statement delimiter)* "}"
//! declaration := annotation? declaration | bare_declaration
//! bare_declaration := type_declaration | func_declaration | binding_declaration | impl_declaration | module
//! ```
//! ## Declarations
//!
//! ### Types
//! ```text
//! type_declaration := "type" type_declaration_body
//! type_declaration_body := "alias" type_alias | "mut"? type_singleton_name "=" type_body
//! type_alias := IDENTIFIER "=" type_name
//! type_body := immediate_record | (variant_specifier ("|" variant_specifier)*)
//! immediate_record := "{" typed_identifier_list "}"
//! variant_specifier := IDENTIFIER | IDENTIFIER "{" typed_identifier_list "}" | IDENTIFIER "(" type_name* ")"
//! typed_identifier_list := typed_identifier*
//! typed_identifier := IDENTIFIER type_anno
//! ```
//! ### Functions
//!
//! ```text
//! func_declaration := func_signature func_body
//! func_body := ε | nonempty_func_body
//! nonempty_func_body := "{" (statement delimiter)* "}"
//! func_signature := "fn" func_name formal_param_list type_anno+
//! func_name := IDENTIFIER | operator
//! formal_param_list := "(" (formal_param ",")* ")"
//! formal_param := IDENTIFIER type_anno+
//! ```
//!
//! ### Variable bindings
//! ```text binding_declaration := "let" "mut"? IDENTIFIER "=" expresion```
//!
//! ### Interfaces
//!
//! ```text
//! interface_declaration := "interface" type_singleton_name signature_block
//! impl_declaration := "impl" type_singleton_name decl_block | "impl" type_singleton_name "for" type_name decl_block
//! decl_block := "{" (func_declaration)* "}"
//! signature_block := "{" (func_signature)* "}"
//! ```
//!
//! ### Type Annotations
//!
//! ```text
//! type_anno := ":" type_name
//! type_name := type_singleton_name | "(" type_names ")"
//! type_names := ε | type_name (, type_name)*
//! type_singleton_name = IDENTIFIER (type_params)*
//! type_params := "<" type_name (, type_name)* ">"
//! ```
//!
//! ## Expressions
//! ```text
//! expression := precedence_expr type_anno?
//! precedence_expr := prefix_expr
//! prefix_expr := prefix_op extended_expr
//!
//! prefix_op := "+" | "-" | "!" | "~" | ε
//!
//! extended_expr := primary (ε | index | call | access)*
//! index := "[" (expression ("," expression)*) "]"
//! call := "(" invocation_list ")"
//! access := "." identifier
//! primary := literal | paren_expr | if_expr | for_expr | while_expr | identifier_expr | lambda_expr | anonymous_struct | list_expr
//! expr_or_block := "{" (statement delimiter)* "}" | expr
//!
//! invocation_list := invocation_argument ("," invocation_argument)* | ε
//! invocation_argument := expression | IDENTIFIER "=" expression | "_"
//! ```
//! //TODO fix expressions, add accessor_expr as a new thing
//!
//! ### Primary expressions
//!
//! ```text
//! list_expr := "[" (expression, ",")* "]"
//! lambda_expr := "\\" lambda_param_list type_anno? nonempty_func_body
//! lambda_param_list := formal_param_list | formal_param
//! paren_expr := "(" paren_inner ")"
//! paren_inner := (expression ",")*
//! identifier_expr := qualified_identifier | named_struct
//! qualified_identifier := IDENTIFIER ("::" IDENTIFIER)*
//! ```
//!
//! ## Literals
//! ```text
//! literal := "true" | "false" | number_literal | STR_LITERAL
//! named_struct := qualified_identifier record_block
//! record_block := "{" (record_entry, ",")* | "}" //TODO support anonymus structs, update syntax
//! record_entry := IDENTIFIER ":" expression
//! anonymous_struct := TODO
//! number_literal := int_literal | float_literal
//! int_literal = ("0x" | "0b") digits
//! float_literal := digits ("." digits)
//! digits := (digit_group underscore*)+
//! digit_group := DIGIT+
//! ```
//!
//! ### Patterns
//! ```text
//! pattern := "(" (pattern, ",")* ")" | simple_pattern
//! simple_pattern := pattern_literal | record_pattern | tuple_struct_pattern
//! pattern_literal := "true" | "false" | signed_number_literal | STR_LITERAL | qualified_identifier
//! signed_number_literal := "-"? number_literal
//! record_pattern := qualified_identifier "{" (record_pattern_entry, ",")* "}"
//! record_pattern_entry := IDENTIFIER | IDENTIFIER ":" Pattern
//! tuple_struct_pattern := qualified_identifier "(" (pattern, ",")* ")"
//! ```
//! ### If expressions
//!
//! TODO: it would be nice if the grammar could capture an incomplete precedence expr in the
//! discriminator
//!
//! ```text
//! if_expr := "if" discriminator if_expr_body
//! if_expr_body := ("then" simple_conditional | "is" simple_pattern_match | cond_block)
//! discriminator := ε | expression
//! simple_conditional := expr_or_block else_case
//! simple_pattern_match := pattern "then" simple_conditional
//! else_case := "else" expr_or_block
//!
//! cond_block := "{" (cond_arm comma_or_delimiter)* "}"
//! cond_arm := condition guard "then" expr_or_block | "else" expr_or_block
//! condition := "is" pattern | operator precedence_expr | expression
//! guard := "if" expression
//! comma_or_delimiter := "," | delimiter
//! ```
//!
//!
//! ### While expressions
//! ```text
//! while_expr := "while" while_cond "{" (statement delimiter)* "}"
//! while_cond := ε | expression | expression "is" pattern //TODO maybe is-expresions should be primary
//! ```
//!
//! //TODO this implies there must be at least one enumerator, which the parser doesn"t support right
//! //this second, and maybe should fail later anyway
//! ### For-expressions
//! ```text
//! for_expr := "for" (enumerator | "{" enumerators "}") for_expr_body
//! for_expr_body := "return" expression | "{" (statement delimiter)* "}"
//! enumerators := enumerator ("," enumerators)*
//! enumerator := identifier "<-" expression | identifier "=" expression //TODO add guards, etc.
//! ```
//! ## Imports
//! ```text
//! import := 'import' IDENTIFIER (:: IDENTIFIER)* import_suffix
//! import_suffix := ε | '::{' IDENTIFIER (, IDENTIFIER)* '}' | '*' //TODO add qualified, exclusions, etc.
//!
//! ## Modules
//!
//! module := 'module' IDENTIFIER '{' statement* '}'
//! ```
pub mod new;
mod test; mod test;
use std::fmt; use std::fmt;
use crate::{
ast::{ASTItem, AST},
identifier::{Id, IdStore},
};
pub struct Parser {
id_store: IdStore<ASTItem>,
}
impl Parser {
pub(crate) fn new() -> Self {
Self { id_store: IdStore::new() }
}
pub(crate) fn parse(&mut self, input: &str) -> Result<AST, ParseError> {
use peg::str::LineCol;
new::schala_parser::program(input, self).map_err(|err: peg::error::ParseError<LineCol>| {
let msg = err.to_string();
ParseError { msg, location: err.location.offset.into() }
})
}
fn fresh(&mut self) -> Id<ASTItem> {
self.id_store.fresh()
}
}
/// Represents a parsing error /// Represents a parsing error
#[derive(Debug)] #[derive(Debug)]
pub struct ParseError { pub struct ParseError {

View File

@ -1,40 +1,14 @@
use std::rc::Rc; use std::rc::Rc;
use super::Parser;
//TODO make use of the format_parse_error function //TODO make use of the format_parse_error function
//use crate::error::{SchalaError, format_parse_error}; //use crate::error::{SchalaError, format_parse_error};
use crate::{ use crate::ast::*;
ast::*,
identifier::{Id, IdStore},
parsing::ParseError,
};
fn rc_string(s: &str) -> Rc<String> { fn rc_string(s: &str) -> Rc<String> {
Rc::new(s.to_string()) Rc::new(s.to_string())
} }
pub struct Parser {
id_store: IdStore<ASTItem>,
}
impl Parser {
pub(crate) fn new() -> Self {
Self { id_store: IdStore::new() }
}
pub(crate) fn parse(&mut self, input: &str) -> Result<AST, ParseError> {
use peg::str::LineCol;
schala_parser::program(input, self).map_err(|err: peg::error::ParseError<LineCol>| {
let msg = err.to_string();
ParseError { msg, location: err.location.offset.into() }
})
}
fn fresh(&mut self) -> Id<ASTItem> {
self.id_store.fresh()
}
}
enum ExtendedPart<'a> { enum ExtendedPart<'a> {
Index(Vec<Expression>), Index(Vec<Expression>),
Accessor(&'a str), Accessor(&'a str),
@ -83,6 +57,7 @@ peg::parser! {
"break" { FlowControl::Break } / "break" { FlowControl::Break } /
"return" _ expr:expression(parser)? { FlowControl::Return(expr) } "return" _ expr:expression(parser)? { FlowControl::Return(expr) }
//TODO add the ability to rename and exclude imports
rule import(parser: &mut Parser) -> ImportSpecifier = rule import(parser: &mut Parser) -> ImportSpecifier =
"import" _ path_components:path_components() suffix:import_suffix()? { "import" _ path_components:path_components() suffix:import_suffix()? {
ImportSpecifier { ImportSpecifier {
@ -351,7 +326,7 @@ peg::parser! {
} }
//TODO anonymous structs, update syntax for structs //TODO support anonymous structs and Elm-style update syntax for structs
rule record_block(parser: &mut Parser) -> Vec<(&'input str, Expression)> = rule record_block(parser: &mut Parser) -> Vec<(&'input str, Expression)> =
"{" _ entries:(record_entry(parser) ** ",") _ "}" { entries } "{" _ entries:(record_entry(parser) ** ",") _ "}" { entries }

View File

@ -6,7 +6,7 @@ use std::{fmt::Write, rc::Rc};
use pretty_assertions::assert_eq; use pretty_assertions::assert_eq;
use super::new::{schala_parser, Parser}; use super::{new::schala_parser, Parser};
use crate::{ast::*, parsing::Location}; use crate::{ast::*, parsing::Location};
fn rc(s: &str) -> Rc<String> { fn rc(s: &str) -> Rc<String> {

View File

@ -17,7 +17,7 @@ pub struct Schala<'a> {
/// Contains information for type-checking /// Contains information for type-checking
type_context: type_inference::TypeContext, type_context: type_inference::TypeContext,
/// Schala Parser /// Schala Parser
active_parser: parsing::new::Parser, active_parser: parsing::Parser,
/// Execution state for AST-walking interpreter /// Execution state for AST-walking interpreter
eval_state: tree_walk_eval::State<'a>, eval_state: tree_walk_eval::State<'a>,
@ -45,7 +45,7 @@ impl<'a> Schala<'a> {
source_reference: SourceReference::new(), source_reference: SourceReference::new(),
symbol_table: symbol_table::SymbolTable::new(), symbol_table: symbol_table::SymbolTable::new(),
type_context: type_inference::TypeContext::new(), type_context: type_inference::TypeContext::new(),
active_parser: parsing::new::Parser::new(), active_parser: parsing::Parser::new(),
eval_state: tree_walk_eval::State::new(), eval_state: tree_walk_eval::State::new(),
timings: Vec::new(), timings: Vec::new(),
} }

View File

@ -52,7 +52,7 @@ where T: Hash + Eq
/// Quickly create an AST from a string, with no error checking. For test use only /// Quickly create an AST from a string, with no error checking. For test use only
#[cfg(test)] #[cfg(test)]
pub fn quick_ast(input: &str) -> crate::ast::AST { pub fn quick_ast(input: &str) -> crate::ast::AST {
let mut parser = crate::parsing::new::Parser::new(); let mut parser = crate::parsing::Parser::new();
let output = parser.parse(input); let output = parser.parse(input);
output.unwrap() output.unwrap()
} }