schala/schala-lang/src/parsing/mod.rs

199 lines
7.0 KiB
Rust

#![allow(clippy::upper_case_acronyms)]
//! # Parsing
//! This module is where the recursive-descent parsing methods live.
//!
//!
//! # Schala EBNF Grammar
//! This document is the authoritative grammar of Schala, represented in something approximating
//! Extended Backus-Naur form. Terminal productions are in "double quotes", or UPPERCASE
//! if they represent a class of tokens rather than an specific string, or are otherwise
//! unreprsentable in ASCII.
//!
//! ## Top level structure
//!
//! ```text
//! program := (statement delimiter)* EOF
//! delimiter := NEWLINE | ";"
//! statement := expression | declaration | import | flow
//! block := "{" (statement delimiter)* "}"
//! declaration := annotation? declaration | bare_declaration
//! bare_declaration := type_declaration | func_declaration | binding_declaration | impl_declaration | module
//! ```
//! ## Declarations
//!
//! ### Types
//! ```text
//! type_declaration := "type" type_declaration_body
//! type_declaration_body := "alias" type_alias | "mut"? type_singleton_name "=" type_body
//! type_alias := IDENTIFIER "=" type_name
//! type_body := immediate_record | (variant_specifier ("|" variant_specifier)*)
//! immediate_record := "{" typed_identifier_list "}"
//! variant_specifier := IDENTIFIER | IDENTIFIER "{" typed_identifier_list "}" | IDENTIFIER "(" type_name* ")"
//! typed_identifier_list := typed_identifier*
//! typed_identifier := IDENTIFIER type_anno
//! ```
//! ### Functions
//!
//! ```text
//! func_declaration := func_signature func_body
//! func_body := ε | nonempty_func_body
//! nonempty_func_body := "{" (statement delimiter)* "}"
//! func_signature := "fn" func_name formal_param_list type_anno+
//! func_name := IDENTIFIER | operator
//! formal_param_list := "(" (formal_param ",")* ")"
//! formal_param := IDENTIFIER type_anno+
//! ```
//!
//! ### Variable bindings
//! ```text binding_declaration := "let" "mut"? IDENTIFIER "=" expresion```
//!
//! ### Interfaces
//!
//! ```text
//! interface_declaration := "interface" type_singleton_name signature_block
//! impl_declaration := "impl" type_singleton_name decl_block | "impl" type_singleton_name "for" type_name decl_block
//! decl_block := "{" (func_declaration)* "}"
//! signature_block := "{" (func_signature)* "}"
//! ```
//!
//! ### Type Annotations
//!
//! ```text
//! type_anno := ":" type_name
//! type_name := type_singleton_name | "(" type_names ")"
//! type_names := ε | type_name (, type_name)*
//! type_singleton_name = IDENTIFIER (type_params)*
//! type_params := "<" type_name (, type_name)* ">"
//! ```
//!
//! ## Expressions
//! ```text
//! expression := precedence_expr type_anno?
//! precedence_expr := prefix_expr
//! prefix_expr := prefix_op extended_expr
//!
//! prefix_op := "+" | "-" | "!" | "~" | ε
//!
//! extended_expr := primary (ε | index | call | access)*
//! index := "[" (expression ("," expression)*) "]"
//! call := "(" invocation_list ")"
//! access := "." identifier
//! primary := literal | paren_expr | if_expr | for_expr | while_expr | identifier_expr | lambda_expr | anonymous_struct | list_expr
//! expr_or_block := "{" (statement delimiter)* "}" | expr
//!
//! invocation_list := invocation_argument ("," invocation_argument)* | ε
//! invocation_argument := expression | IDENTIFIER "=" expression | "_"
//! ```
//! //TODO fix expressions, add accessor_expr as a new thing
//!
//! ### Primary expressions
//!
//! ```text
//! list_expr := "[" (expression, ",")* "]"
//! lambda_expr := "\\" lambda_param_list type_anno? nonempty_func_body
//! lambda_param_list := formal_param_list | formal_param
//! paren_expr := "(" paren_inner ")"
//! paren_inner := (expression ",")*
//! identifier_expr := qualified_identifier | named_struct
//! qualified_identifier := IDENTIFIER ("::" IDENTIFIER)*
//! ```
//!
//! ## Literals
//! ```text
//! literal := "true" | "false" | number_literal | STR_LITERAL
//! named_struct := qualified_identifier record_block
//! record_block := "{" (record_entry, ",")* | "}" //TODO support anonymus structs, update syntax
//! record_entry := IDENTIFIER ":" expression
//! anonymous_struct := TODO
//! number_literal := int_literal | float_literal
//! int_literal = ("0x" | "0b") digits
//! float_literal := digits ("." digits)
//! digits := (digit_group underscore*)+
//! digit_group := DIGIT+
//! ```
//!
//! ### Patterns
//! ```text
//! pattern := "(" (pattern, ",")* ")" | simple_pattern
//! simple_pattern := pattern_literal | record_pattern | tuple_struct_pattern
//! pattern_literal := "true" | "false" | signed_number_literal | STR_LITERAL | qualified_identifier
//! signed_number_literal := "-"? number_literal
//! record_pattern := qualified_identifier "{" (record_pattern_entry, ",")* "}"
//! record_pattern_entry := IDENTIFIER | IDENTIFIER ":" Pattern
//! tuple_struct_pattern := qualified_identifier "(" (pattern, ",")* ")"
//! ```
//! ### If expressions
//!
//! TODO: it would be nice if the grammar could capture an incomplete precedence expr in the
//! discriminator
//!
//! ```text
//! if_expr := "if" discriminator if_expr_body
//! if_expr_body := ("then" simple_conditional | "is" simple_pattern_match | cond_block)
//! discriminator := ε | expression
//! simple_conditional := expr_or_block else_case
//! simple_pattern_match := pattern "then" simple_conditional
//! else_case := "else" expr_or_block
//!
//! cond_block := "{" (cond_arm comma_or_delimiter)* "}"
//! cond_arm := condition guard "then" expr_or_block | "else" expr_or_block
//! condition := "is" pattern | operator precedence_expr | expression
//! guard := "if" expression
//! comma_or_delimiter := "," | delimiter
//! ```
//!
//!
//! ### While expressions
//! ```text
//! while_expr := "while" while_cond "{" (statement delimiter)* "}"
//! while_cond := ε | expression | expression "is" pattern //TODO maybe is-expresions should be primary
//! ```
//!
//! //TODO this implies there must be at least one enumerator, which the parser doesn"t support right
//! //this second, and maybe should fail later anyway
//! ### For-expressions
//! ```text
//! for_expr := "for" (enumerator | "{" enumerators "}") for_expr_body
//! for_expr_body := "return" expression | "{" (statement delimiter)* "}"
//! enumerators := enumerator ("," enumerators)*
//! enumerator := identifier "<-" expression | identifier "=" expression //TODO add guards, etc.
//! ```
//! ## Imports
//! ```text
//! import := 'import' IDENTIFIER (:: IDENTIFIER)* import_suffix
//! import_suffix := ε | '::{' IDENTIFIER (, IDENTIFIER)* '}' | '*' //TODO add qualified, exclusions, etc.
//!
//! ## Modules
//!
//! module := 'module' IDENTIFIER '{' statement* '}'
//! ```
pub mod new;
mod test;
use std::fmt;
/// Represents a parsing error
#[derive(Debug)]
pub struct ParseError {
pub msg: String,
pub location: Location,
}
#[derive(Debug, Clone, Copy, PartialEq, Default)]
pub struct Location {
pub(crate) offset: usize,
}
impl From<usize> for Location {
fn from(offset: usize) -> Self {
Self { offset }
}
}
impl fmt::Display for Location {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "{}", self.offset)
}
}