From 6b47ecf2d70759458ceb7fe688822d9e58c407d7 Mon Sep 17 00:00:00 2001 From: greg Date: Sat, 8 Jun 2019 23:59:49 -0700 Subject: [PATCH] First pass at putting EBNF grammar into rustdoc --- schala-lang/language/src/lib.rs | 2 +- schala-lang/language/src/parsing.rs | 276 ++++++++++++++++------------ 2 files changed, 157 insertions(+), 121 deletions(-) diff --git a/schala-lang/language/src/lib.rs b/schala-lang/language/src/lib.rs index c38ecac..023c1e1 100644 --- a/schala-lang/language/src/lib.rs +++ b/schala-lang/language/src/lib.rs @@ -47,7 +47,7 @@ mod builtin; mod reduced_ast; mod eval; -/// All bits of state necessary to parse and execute a Schala program are stored in this struct. +/// All the state necessary to parse and execute a Schala program are stored in this struct. /// `state` represents the execution state for the AST-walking interpreter, the other fields /// should be self-explanatory. pub struct Schala { diff --git a/schala-lang/language/src/parsing.rs b/schala-lang/language/src/parsing.rs index 08bd58e..e8c1379 100644 --- a/schala-lang/language/src/parsing.rs +++ b/schala-lang/language/src/parsing.rs @@ -1,3 +1,151 @@ +//! # Parsing +//! This module is where the recursive-descent parsing methods live. +//! +//! +//! # Schala EBNF Grammar +//! Terminal productions are in 'single quotes' or UPPERCASE if they are a class of tokens, +//! or otherwise not representable in ASCII. +//! +//! ## Top level structure +//! +//! ``` +//! program := (statement delimiter)* EOF +//! delimiter := NEWLINE | ";" +//! statement := expression | declaration +//! block := "{" (statement delimiter)* "}" +//! declaration := type_declaration | func_declaration | binding_declaration | impl_declaration +//! ``` +//! ## Declarations - Types +//! ``` +//! type_declaration := "type" type_declaration_body +//! type_declaration_body := "alias" type_alias | "mut"? type_singleton_name "=" type_body +//! type_alias := IDENTIFIER "=" type_name +//! type_body := variant_specifier ("|" variant_specifier)* +//! variant_specifier := IDENTIFIER | IDENTIFIER "{" typed_identifier_list "}" | IDENTIFIER "(" type_name* ")" +//! typed_identifier_list := typed_identifier* +//! typed_identifier := IDENTIFIER type_anno +//! ``` +//! ## Declaration - Functions +//! +//! ``` +//! func_declaration := func_signature func_body +//! func_body := ε | nonempty_func_body +//! nonempty_func_body := "{" (statement delimiter)* "}" +//! func_signature := "fn" func_name formal_param_list type_anno+ +//! func_name := IDENTIFIER | operator +//! formal_param_list := "(" (formal_param ",")* ")" +//! formal_param := IDENTIFIER type_anno+ +//! ``` +//! +//! ## Declaration - Variable bindings +//! ```binding_declaration := "let" "mut"? IDENTIFIER "=" expresion``` +//! +//! ## Declaration - Interface +//! +//! ``` +//! interface_declaration := "interface" type_singleton_name signature_block +//! impl_declaration := "impl" type_singleton_name decl_block | "impl" type_singleton_name "for" type_name decl_block +//! decl_block := "{" (func_declaration)* "}" +//! signature_block := "{" (func_signature)* "}" +//! ``` +//! +//! ## Type Annotations +//! +//! ``` +//! type_anno := (":" type_name)+ +//! type_name := type_singleton_name | "(" type_names ")" +//! type_names := ε | type_name (, type_name)* +//! type_singleton_name = IDENTIFIER (type_params)* +//! type_params := "<" type_name (, type_name)* ">" +//! ``` +//! +//! ## Expressions +//! ``` +//! expression := precedence_expr type_anno+ +//! precedence_expr := prefix_expr +//! prefix_expr := prefix_op call_expr +//! prefix_op := "+" | "-" | "!" | "~" +//! call_expr := index_expr ( "(" expr_list ")" )* | ε +//! ``` +//! +//! ``` +//! expr_list := expression ("," expression)* | ε +//! index_expr := primary ( "[" (expression ("," (expression)* | ε) "]" )* +//! primary := literal | paren_expr | if_expr | for_expr | while_expr | identifier_expr | lambda_expr | anonymous_struct | list_expr +//! ``` +//! +//! ## Primary expressions +//! +//! ``` +//! list_expr := "[" (expression, ",")* "]" +//! lambda_expr := "\\" lambda_param_list type_anno+ nonempty_func_body +//! lambda_param_list := formal_param_list | formal_param +//! paren_expr := LParen paren_inner RParen +//! paren_inner := (expression ",")* +//! identifier_expr := named_struct | IDENTIFIER +//! ``` +//! +//! Expression literals +//! ``` +//! literal := "true" | "false" | number_literal | STR_LITERAL +//! named_struct := IDENTIFIER record_block +//! record_block := "{" (record_entry, ",")* | "}" //TODO support anonymus structs, update syntax +//! record_entry := IDENTIFIER ":" expression +//! anonymous_struct := TODO +//! ``` +//! +//! A `float_literal` can still be assigned to an int in type-checking +//! ``` +//! number_literal := int_literal | float_literal +//! int_literal = ("0x" | "0b") digits +//! float_literal := digits ("." digits) +//! digits := (DIGIT_GROUP underscore)+ +//! ``` +//! +//! ## Patterns +//! ``` +//! pattern := "(" (pattern, ",")* ")" | simple_pattern +//! simple_pattern := pattern_literal | record_pattern | tuple_struct_pattern +//! pattern_literal := "true" | "false" | signed_number_literal | STR_LITERAL | IDENTIFIER +//! signed_number_literal := "-"? number_literal +//! record_pattern := IDENTIFIER "{" (record_pattern_entry, ",")* "}" +//! record_pattern_entry := IDENTIFIER | IDENTIFIER ":" Pattern +//! tuple_struct_pattern := IDENTIFIER "(" (pattern, ",")* ")" +//! ``` +//! +//! ``` +//! expr_or_block := "{" (statement delimiter)* "}" | expr +//! ``` +//! +//! ## If-expressions +//! ``` +//! if_expr := "if" discriminator ("then" condititional | "is" simple_pattern_match | guard_block) +//! discriminator := precedence_expr (operator)+ +//! conditional := expr_or_block else_clause +//! simple_pattern_match := pattern "then" conditional +//! else_clause := ε | "else" expr_or_block +//! guard_block := "{" (guard_arm, ",")* "}" +//! guard_arm := guard "->" expr_or_block +//! guard := "is" pattern | (operator)+ precedence_expr +//! ``` +//! +//! ## While expressions +//! ``` +//! while_expr := "while" while_cond "{" (statement delimiter)* "}" +//! while_cond := ε | expression | expression "is" pattern //TODO maybe is-expresions should be primary +//! ``` +//! +//! //TODO this implies there must be at least one enumerator, which the parser doesn"t support right +//! //this second, and maybe should fail later anyway +//! ## For-expressions +//! ``` +//! for_expr := "for" (enumerator | "{" enumerators "}") for_expr_body +//! for_expr_body := "return" expression | "{" (statement delimiter)* "}" +//! enumerators := enumerator ("," enumerators)* +//! enumerator := identifier "<-" expression | identifier "=" expression //TODO add guards, etc. +//! ``` +//! + use std::rc::Rc; use std::iter::Peekable; use std::vec::IntoIter; @@ -10,6 +158,7 @@ use crate::ast::*; use crate::builtin::{BinOp, PrefixOp}; +/// Represents a parsing error #[derive(Debug)] pub struct ParseError { pub msg: String, @@ -22,6 +171,7 @@ impl ParseError { } } +/// Represents either a successful parsing result or a ParseError pub type ParseResult = Result; #[derive(Debug)] @@ -31,6 +181,7 @@ pub struct ParseRecord { level: u32, } +/// Main data structure for doing parsing. pub struct Parser { token_handler: TokenHandler, parse_record: Vec, @@ -69,6 +220,7 @@ impl TokenHandler { } impl Parser { + /// Create a new parser initialized with some tokens. pub fn new(initial_input: Vec) -> Parser { Parser { token_handler: TokenHandler::new(initial_input), @@ -78,6 +230,7 @@ impl Parser { } } + /// Parse all loaded tokens up to this point. pub fn parse(&mut self) -> ParseResult { self.program() } @@ -155,128 +308,10 @@ macro_rules! delimited { }; } -/* Schala EBNF Grammar */ -/* Terminal productions are in 'single quotes' or UPPERCASE if they are a class - * or not representable in ASCII - -/* Top-level Structure */ - -program := (statement delimiter)* EOF -delimiter := NEWLINE | ';' -statement := expression | declaration -block := '{' (statement delimiter)* '}' - -declaration := type_declaration | func_declaration | binding_declaration | impl_declaration - -/* Declarations - Types */ - -type_declaration := 'type' type_declaration_body -type_declaration_body := 'alias' type_alias | 'mut'? type_singleton_name '=' type_body -type_alias := IDENTIFIER '=' type_name -type_body := variant_specifier ('|' variant_specifier)* -variant_specifier := IDENTIFIER | IDENTIFIER '{' typed_identifier_list '}' | IDENTIFIER '(' type_name* ')' -typed_identifier_list := typed_identifier* -typed_identifier := IDENTIFIER type_anno - -/* Declaration - Functions */ - -func_declaration := func_signature func_body -func_body := ε | nonempty_func_body -nonempty_func_body := '{' (statement delimiter)* '}' -func_signature := 'fn' func_name formal_param_list type_anno+ -func_name := IDENTIFIER | operator -formal_param_list := '(' (formal_param ',')* ')' -formal_param := IDENTIFIER type_anno+ - -/* Declaration - Variable bindings */ -binding_declaration := 'let' 'mut'? IDENTIFIER '=' expresion - -/* Declaration - Interface */ - -interface_declaration := 'interface' type_singleton_name signature_block -impl_declaration := 'impl' type_singleton_name decl_block | 'impl' type_singleton_name 'for' type_name decl_block -decl_block := '{' (func_declaration)* '}' -signature_block := '{' (func_signature)* '}' - -/* Type annotations */ - -type_anno := (':' type_name)+ -type_name := type_singleton_name | '(' type_names ')' -type_names := ε | type_name (, type_name)* -type_singleton_name = IDENTIFIER (type_params)* -type_params := '<' type_name (, type_name)* '>' - - -/* Expressions */ - -expression := precedence_expr type_anno+ -precedence_expr := prefix_expr -prefix_expr := prefix_op call_expr -prefix_op := '+' | '-' | '!' | '~' -call_expr := index_expr ( '(' expr_list ')' )* | ε - -expr_list := expression (',' expression)* | ε -index_expr := primary ( '[' (expression (',' (expression)* | ε) ']' )* -primary := literal | paren_expr | if_expr | for_expr | while_expr | identifier_expr | lambda_expr | anonymous_struct | list_expr - -/* Primary Expressions */ - -list_expr := '[' (expression, ',')* ']' -lambda_expr := '\' lambda_param_list type_anno+ nonempty_func_body -lambda_param_list := formal_param_list | formal_param -paren_expr := LParen paren_inner RParen -paren_inner := (expression ',')* -identifier_expr := named_struct | IDENTIFIER - -/* Expression - Literals */ - -literal := 'true' | 'false' | number_literal | STR_LITERAL -named_struct := IDENTIFIER record_block -record_block := '{' (record_entry, ',')* | '}' //TODO support anonymus structs, update syntax -record_entry := IDENTIFIER ':' expression -anonymous_struct := TODO - -// a float_literal can still be assigned to an int in type-checking -number_literal := int_literal | float_literal -int_literal = ('0x' | '0b') digits -float_literal := digits ('.' digits) -digits := (DIGIT_GROUP underscore)+ - -/* Pattern syntax */ -pattern := '(' (pattern, ',')* ')' | simple_pattern -simple_pattern := pattern_literal | record_pattern | tuple_struct_pattern -pattern_literal := 'true' | 'false' | signed_number_literal | STR_LITERAL | IDENTIFIER -signed_number_literal := '-'? number_literal -record_pattern := IDENTIFIER '{' (record_pattern_entry, ',')* '}' -record_pattern_entry := IDENTIFIER | IDENTIFIER ':' Pattern -tuple_struct_pattern := IDENTIFIER '(' (pattern, ',')* ')' - -expr_or_block := '{' (statement delimiter)* '}' | expr - -/* Expression - If */ -if_expr := 'if' discriminator ('then' condititional | 'is' simple_pattern_match | guard_block) -discriminator := precedence_expr (operator)+ -conditional := expr_or_block else_clause -simple_pattern_match := pattern 'then' conditional -else_clause := ε | 'else' expr_or_block -guard_block := '{' (guard_arm, ',')* '}' -guard_arm := guard '->' expr_or_block -guard := 'is' pattern | (operator)+ precedence_expr - -/* Expression - While */ -while_expr := 'while' while_cond '{' (statement delimiter)* '}' -while_cond := ε | expression | expression 'is' pattern //TODO maybe is-expresions should be primary - -//TODO this implies there must be at least one enumerator, which the parser doesn't support right -//this second, and maybe should fail later anyway -/* Expression - For */ -for_expr := 'for' (enumerator | '{' enumerators '}') for_expr_body -for_expr_body := 'return' expression | '{' (statement delimiter)* '} -enumerators := enumerator (',' enumerators)* -enumerator := identifier '<-' expression | identifier '=' expression //TODO add guards, etc. -*/ impl Parser { + /// `program := (statement delimiter)* EOF` + /// `delimiter := NEWLINE | ';'` #[recursive_descent_method] fn program(&mut self) -> ParseResult { let mut statements = Vec::new(); @@ -295,6 +330,7 @@ impl Parser { Ok(AST(statements)) } + /// `statement := expression | declaration` #[recursive_descent_method] fn statement(&mut self) -> ParseResult { //TODO handle error recovery here