Compare commits

...

45 Commits

Author SHA1 Message Date
Greg Shuflin
94ee3e1897 Delete a bunch of now-obsolete parsing/tokenizing code 2021-11-14 03:18:05 -08:00
Greg Shuflin
05e1555a9b Run rustfmt 2021-11-14 03:07:58 -08:00
Greg Shuflin
4b0aced11f Swap over parser 2021-11-14 02:43:48 -08:00
Greg Shuflin
205ab7179d Rewrites to prepare for parser swap 2021-11-14 02:15:08 -08:00
Greg Shuflin
abab667c43 Fix all tests 2021-11-14 01:57:53 -08:00
Greg Shuflin
869de8c033 Thread Parser through peg rules to provide ID's 2021-11-13 22:39:15 -08:00
Greg Shuflin
ba8fb86e3f Create new Parser wrapper type 2021-11-13 20:22:29 -08:00
Greg Shuflin
a00125d4a5 Make string literals work properly 2021-11-13 13:47:10 -08:00
Greg Shuflin
a93fc48ee8 Make anciliary parsing functions not panic 2021-11-13 13:33:15 -08:00
Greg Shuflin
8fe7fca88c Fill out all reserved words 2021-11-13 13:23:21 -08:00
Greg Shuflin
6cd5a9353c Line comments 2021-11-13 13:18:02 -08:00
Greg Shuflin
671ce54dd3 Get rid of "2" in parse test macros 2021-11-13 01:42:49 -08:00
Greg Shuflin
c67adc3a38 Block comments 2021-11-13 01:41:17 -08:00
Greg Shuflin
13353f8801 Get rid of left-recursion cached rule 2021-11-12 21:41:15 -08:00
Greg Shuflin
10ea99e95c rewrite source reference to use raw offsets 2021-11-12 02:06:19 -08:00
Greg Shuflin
fa736f2dd4 Make Location only track offset into source string 2021-11-12 01:18:57 -08:00
Greg Shuflin
b7f796322b Add offset to Location type 2021-11-12 01:14:03 -08:00
Greg Shuflin
f9349edf77 run rustfmt 2021-11-12 01:13:13 -08:00
Greg Shuflin
c5f7616303 Finish porting over parsing rules 2021-11-12 00:32:11 -08:00
Greg Shuflin
5af42d0828 Flow control 2021-11-12 00:27:06 -08:00
Greg Shuflin
92c6d7f311 imports 2021-11-12 00:06:42 -08:00
Greg Shuflin
e618498881 Modules 2021-11-11 23:44:26 -08:00
Greg Shuflin
a31735da88 Block syntax fixes 2021-11-11 23:42:22 -08:00
Greg Shuflin
96d12f3659 Take care of all expression tests 2021-11-11 22:24:35 -08:00
Greg Shuflin
c3d36ab320 Interface, impls 2021-11-11 22:00:04 -08:00
Greg Shuflin
7bd6072dae More work 2021-11-11 20:56:39 -08:00
Greg Shuflin
08a4800175 Fix newlines 2021-11-11 19:36:48 -08:00
Greg Shuflin
8d7f8f555f One more test 2021-11-11 19:02:10 -08:00
Greg Shuflin
fbb0269623 Type annos 2021-11-11 19:01:16 -08:00
Greg Shuflin
8c48f63a2d Lambdas 2021-11-11 02:42:14 -08:00
Greg Shuflin
54b33282ef More declarations 2021-11-07 03:13:35 -08:00
Greg Shuflin
d46f40bc0f type declarations 2021-11-07 01:30:26 -08:00
Greg Shuflin
02fc76c8fc Fix issue with block formatting 2021-11-06 20:34:35 -07:00
Greg Shuflin
87141fcca3 for expressions 2021-11-05 12:52:41 -07:00
Greg Shuflin
4ec2585d25 Paramaterize struct-related functions 2021-11-05 03:18:28 -07:00
Greg Shuflin
8aa306746a While-parsing passing
Albeit with a lot of code duplication
2021-11-05 03:08:25 -07:00
Greg Shuflin
4f3ef5c850 Fix prefix-expr bug 2021-11-05 02:46:58 -07:00
Greg Shuflin
76b1e9c0dc More types of expr 2021-11-05 02:43:34 -07:00
Greg Shuflin
6a318257d6 If exprs, patterns 2021-11-04 21:11:19 -07:00
Greg Shuflin
8e19b7c39d Precedence 2021-11-03 23:57:22 -07:00
Greg Shuflin
54eb8252a9 Add operator rule 2021-11-03 22:27:14 -07:00
Greg Shuflin
6cbe562241 More work 2021-11-03 20:48:31 -07:00
Greg Shuflin
88b39b5561 Started porting extant tests to new peg parser 2021-11-03 20:19:55 -07:00
Greg Shuflin
359f274f33 More trying out peg 2021-11-03 18:01:23 -07:00
Greg Shuflin
4c1ee0a34e Trying out peg 2021-11-03 16:27:42 -07:00
21 changed files with 877 additions and 1911 deletions

25
Cargo.lock generated
View File

@ -558,6 +558,30 @@ dependencies = [
"winapi 0.3.8",
]
[[package]]
name = "peg"
version = "0.7.0"
source = "git+https://github.com/kevinmehall/rust-peg?rev=960222580c8da25b17d32c2aae6f52f902728b62#960222580c8da25b17d32c2aae6f52f902728b62"
dependencies = [
"peg-macros",
"peg-runtime",
]
[[package]]
name = "peg-macros"
version = "0.7.0"
source = "git+https://github.com/kevinmehall/rust-peg?rev=960222580c8da25b17d32c2aae6f52f902728b62#960222580c8da25b17d32c2aae6f52f902728b62"
dependencies = [
"peg-runtime",
"proc-macro2 1.0.30",
"quote 1.0.10",
]
[[package]]
name = "peg-runtime"
version = "0.7.0"
source = "git+https://github.com/kevinmehall/rust-peg?rev=960222580c8da25b17d32c2aae6f52f902728b62#960222580c8da25b17d32c2aae6f52f902728b62"
[[package]]
name = "phf"
version = "0.7.24"
@ -895,6 +919,7 @@ dependencies = [
"ena",
"failure",
"itertools",
"peg",
"pretty_assertions",
"radix_trie",
"schala-lang-codegen",

View File

@ -34,7 +34,6 @@ impl Fold for RecursiveDescentFn {
}
result.map_err(|mut parse_error: ParseError| {
parse_error.production_name = Some(stringify!(#ident).to_string());
parse_error
})
}

View File

@ -14,6 +14,9 @@ derivative = "1.0.3"
colored = "1.8"
radix_trie = "0.1.5"
assert_matches = "1.5"
#peg = "0.7.0"
peg = { git = "https://github.com/kevinmehall/rust-peg", rev = "960222580c8da25b17d32c2aae6f52f902728b62" }
schala-lang-codegen = { path = "../codegen" }
schala-repl = { path = "../../schala-repl" }

View File

@ -14,8 +14,8 @@ fn getline(arg) { }
fn map(input: Option<T>, func: Func): Option<T> {
if input {
is Option::Some(x) then Option::Some(func(x)),
is Option::None then Option::None,
is Option::Some(x) then Option::Some(func(x))
is Option::None then Option::None
}
}

View File

@ -17,7 +17,7 @@ pub use visitor::*;
use crate::{
derivative::Derivative,
identifier::{define_id_kind, Id},
tokenizing::Location,
parsing::Location,
};
define_id_kind!(ASTItem);
@ -197,6 +197,7 @@ pub struct TypeSingletonName {
pub enum ExpressionKind {
NatLiteral(u64),
FloatLiteral(f64),
//TODO StringLiteral variant needs to support prefixes
StringLiteral(Rc<String>),
BoolLiteral(bool),
BinExp(BinOp, Box<Expression>, Box<Expression>),
@ -239,7 +240,7 @@ pub struct ConditionArm {
pub enum Condition {
Pattern(Pattern),
TruncatedOp(BinOp, Expression),
Expression(Expression),
//Expression(Expression), //I'm pretty sure I don't actually want this
Else,
}
@ -262,7 +263,7 @@ pub enum PatternLiteral {
#[derive(Debug, PartialEq, Clone)]
pub struct Enumerator {
pub id: Rc<String>,
pub id: Rc<String>, //TODO rename this field
pub generator: Expression,
}

View File

@ -1,7 +1,5 @@
use std::rc::Rc;
use crate::tokenizing::TokenKind;
#[derive(Debug, PartialEq, Clone)]
pub struct PrefixOp {
sigil: Rc<String>,
@ -15,10 +13,6 @@ impl PrefixOp {
pub fn sigil(&self) -> &str {
&self.sigil
}
pub fn is_prefix(op: &str) -> bool {
matches!(op, "+" | "-" | "!")
}
}
#[derive(Debug, PartialEq, Clone)]
@ -35,34 +29,14 @@ impl BinOp {
&self.sigil
}
pub fn from_sigil_token(tok: &TokenKind) -> Option<BinOp> {
let s = token_kind_to_sigil(tok)?;
Some(BinOp::from_sigil(s))
}
pub fn min_precedence() -> i32 {
i32::min_value()
}
pub fn get_precedence_from_token(op_tok: &TokenKind) -> Option<i32> {
let s = token_kind_to_sigil(op_tok)?;
Some(binop_precedences(s))
pub fn get_precedence(&self) -> i32 {
binop_precedences(self.sigil.as_ref())
}
}
fn token_kind_to_sigil(tok: &TokenKind) -> Option<&str> {
use self::TokenKind::*;
Some(match tok {
Operator(op) => op.as_str(),
Period => ".",
Pipe => "|",
Slash => "/",
LAngleBracket => "<",
RAngleBracket => ">",
Equals => "=",
_ => return None,
})
}
fn binop_precedences(s: &str) -> i32 {
let default = 10_000_000;
match s {

View File

@ -166,9 +166,6 @@ pub fn walk_if_expr_body<V: ASTVisitor>(v: &mut V, body: &IfExpressionBody) {
Condition::TruncatedOp(ref _binop, ref expr) => {
walk_expression(v, expr);
}
Condition::Expression(ref expr) => {
walk_expression(v, expr);
}
Condition::Else => (),
}
if let Some(ref guard) = arm.guard {

View File

@ -1,8 +1,7 @@
use crate::{
parsing::ParseError,
parsing::{Location, ParseError},
schala::{SourceReference, Stage},
symbol_table::SymbolError,
tokenizing::{Location, Token, TokenKind},
type_inference::TypeError,
};
@ -52,26 +51,6 @@ impl SchalaError {
errors: vec![],
}
}
pub(crate) fn from_tokens(tokens: &[Token]) -> Option<SchalaError> {
let token_errors: Vec<Error> = tokens
.iter()
.filter_map(|tok| match tok.kind {
TokenKind::Error(ref err) => Some(Error {
location: Some(tok.location),
text: Some(err.clone()),
stage: Stage::Tokenizing,
}),
_ => None,
})
.collect();
if token_errors.is_empty() {
None
} else {
Some(SchalaError { errors: token_errors, formatted_parse_error: None })
}
}
}
#[allow(dead_code)]
@ -82,22 +61,18 @@ struct Error {
}
fn format_parse_error(error: ParseError, source_reference: &SourceReference) -> String {
let line_num = error.token.location.line_num;
let ch = error.token.location.char_num;
let line_from_program = source_reference.get_line(line_num as usize);
let location_pointer = format!("{}^", " ".repeat(ch.into()));
let offset = error.location.offset;
let (line_start, line_num, line_from_program) = source_reference.get_line(offset);
let ch = offset - line_start;
let location_pointer = format!("{}^", " ".repeat(ch));
let line_num_digits = format!("{}", line_num).chars().count();
let space_padding = " ".repeat(line_num_digits);
let production = match error.production_name {
Some(n) => format!("\n(from production \"{}\")", n),
None => "".to_string(),
};
format!(
r#"
{error_msg}{production}
{error_msg}
{space_padding} |
{line_num} | {}
{space_padding} | {}
@ -107,6 +82,5 @@ fn format_parse_error(error: ParseError, source_reference: &SourceReference) ->
error_msg = error.msg,
space_padding = space_padding,
line_num = line_num,
production = production
)
}

View File

@ -7,7 +7,6 @@
//! `ProgrammingLanguageInterface` and the chain of compiler passes for it.
extern crate schala_repl;
#[macro_use]
extern crate schala_lang_codegen;
extern crate derivative;
@ -19,7 +18,6 @@ mod type_inference;
mod ast;
mod parsing;
mod tokenizing;
#[macro_use]
mod symbol_table;
mod builtin;

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,592 @@
use std::rc::Rc;
//TODO make use of the format_parse_error function
//use crate::error::{SchalaError, format_parse_error};
use crate::{
ast::*,
identifier::{Id, IdStore},
parsing::ParseError,
};
fn rc_string(s: &str) -> Rc<String> {
Rc::new(s.to_string())
}
pub struct Parser {
id_store: IdStore<ASTItem>,
}
impl Parser {
pub(crate) fn new() -> Self {
Self { id_store: IdStore::new() }
}
pub(crate) fn parse(&mut self, input: &str) -> Result<AST, ParseError> {
use peg::str::LineCol;
schala_parser::program(input, self).map_err(|err: peg::error::ParseError<LineCol>| {
let msg = err.to_string();
ParseError {
msg,
location: err.location.offset.into(),
}
})
}
fn fresh(&mut self) -> Id<ASTItem> {
self.id_store.fresh()
}
}
enum ExtendedPart<'a> {
Index(Vec<Expression>),
Accessor(&'a str),
Call(Vec<InvocationArgument>),
}
peg::parser! {
pub grammar schala_parser() for str {
rule whitespace() = [' ' | '\t' ]
rule whitespace_or_newline() = [' ' | '\t' | '\n' ]
rule _ = quiet!{ (block_comment() / line_comment() / whitespace())* }
rule __ = quiet!{ (block_comment() / line_comment() / whitespace_or_newline())* }
rule block_comment() = "/*" (block_comment() / !"*/" [_])* "*/"
rule line_comment() = "//" (!['\n'] [_])* &"\n"
pub rule program(parser: &mut Parser) -> AST =
__ statements:(statement(parser) ** (delimiter()+) ) __ { AST { id: parser.fresh(), statements: statements.into() } }
rule delimiter() = (";" / "\n")+
//Note - this is a hack, ideally the rule `rule block() -> Block = "{" _ items:(statement() **
//delimiter()) _ "}" { items.into() }` would've worked, but it doesn't.
pub rule block(parser: &mut Parser) -> Block =
"{" __ items:block_item(parser)* __ "}" { items.into() } /
"{" __ stmt:statement(parser) __ "}" { vec![stmt].into() }
rule block_item(parser: &mut Parser) -> Statement =
_ stmt:statement(parser) _ delimiter()+ { stmt }
rule statement(parser: &mut Parser) -> Statement =
_ pos:position!() kind:statement_kind(parser) _ { Statement { id: parser.fresh(), location: pos.into(), kind } }
rule statement_kind(parser: &mut Parser) -> StatementKind =
__ import:import(parser) { StatementKind::Import(import) } /
__ decl:declaration(parser) { StatementKind::Declaration(decl) } /
__ flow:flow(parser) { StatementKind::Flow(flow) } /
__ expr:expression(parser) { StatementKind::Expression(expr) }
rule flow(parser: &mut Parser) -> FlowControl =
"continue" { FlowControl::Continue } /
"break" { FlowControl::Break } /
"return" _ expr:expression(parser)? { FlowControl::Return(expr) }
rule import(parser: &mut Parser) -> ImportSpecifier =
"import" _ path_components:path_components() suffix:import_suffix()? {
ImportSpecifier {
id: parser.fresh(),
path_components,
imported_names: suffix.unwrap_or_else(|| ImportedNames::LastOfPath)
}
}
rule path_components() -> Vec<Rc<String>> =
"::"? name:identifier() rest:path_component()* {
let mut items = vec![rc_string(name)];
items.extend(rest.into_iter().map(|n| rc_string(n)));
items
}
rule path_component() -> &'input str = "::" ident:identifier() { ident }
rule import_suffix() -> ImportedNames =
"::*" { ImportedNames::All } /
"::{" __ names:(identifier() ** (_ "," _)) __ "}" { ImportedNames::List(names.into_iter().map(rc_string).collect()) }
rule declaration(parser: &mut Parser) -> Declaration =
binding(parser) / type_decl(parser) / annotation(parser) / func(parser) / interface(parser) /
implementation(parser) / module(parser)
rule module(parser: &mut Parser) -> Declaration =
"module" _ name:identifier() _ items:block(parser) { Declaration::Module { name: rc_string(name), items } }
rule implementation(parser: &mut Parser) -> Declaration =
"impl" _ interface:type_singleton_name() _ "for" _ type_name:type_identifier() _ block:decl_block(parser) {
Declaration::Impl { type_name, interface_name: Some(interface), block }
} /
"impl" _ type_name:type_identifier() _ block:decl_block(parser) {
Declaration::Impl { type_name, interface_name: None, block }
}
rule decl_block(parser: &mut Parser) -> Vec<Declaration> =
"{" __ decls:(func_declaration(parser) ** (delimiter()+)) __ "}" { decls }
rule interface(parser: &mut Parser) -> Declaration =
"interface" _ name:identifier() _ signatures:signature_block(parser) { Declaration::Interface { name: rc_string(name), signatures } }
rule signature_block(parser: &mut Parser) -> Vec<Signature> =
"{" __ signatures:(func_signature(parser) ** (delimiter()+)) __ "}" { signatures }
rule func(parser: &mut Parser) -> Declaration =
decl:func_declaration(parser) { decl } /
sig:func_signature(parser) { Declaration::FuncSig(sig) }
rule func_declaration(parser: &mut Parser) -> Declaration =
_ sig:func_signature(parser) __ body:block(parser) { Declaration::FuncDecl(sig, body) }
//TODO handle operators
rule func_signature(parser: &mut Parser) -> Signature =
_ "fn" _ name:identifier() "(" _ params:formal_params(parser) _ ")" _ type_anno:type_anno()? { Signature {
name: rc_string(name), operator: false, params, type_anno
} }
rule formal_params(parser: &mut Parser) -> Vec<FormalParam> =
params:(formal_param(parser) ** (_ "," _)) {? if params.len() < 256 { Ok(params) } else {
Err("function-too-long") }
}
rule formal_param(parser: &mut Parser) -> FormalParam =
name:identifier() _ anno:type_anno()? _ "=" expr:expression(parser) { FormalParam { name: rc_string(name),
default: Some(expr), anno } } /
name:identifier() _ anno:type_anno()? { FormalParam { name: rc_string(name), default: None, anno } }
rule annotation(parser: &mut Parser) -> Declaration =
"@" name:identifier() args:annotation_args(parser)? delimiter()+ _ inner:statement(parser) { Declaration::Annotation {
name: rc_string(name), arguments: if let Some(args) = args { args } else { vec![] }, inner: Box::new(inner) }
}
rule annotation_args(parser: &mut Parser) -> Vec<Expression> =
"(" _ args:(expression(parser) ** (_ "," _)) _ ")" { args }
rule binding(parser: &mut Parser) -> Declaration =
"let" _ mutable:"mut"? _ ident:identifier() _ type_anno:type_anno()? _ "=" _ expr:expression(parser) {
Declaration::Binding { name: Rc::new(ident.to_string()), constant: mutable.is_none(),
type_anno, expr }
}
rule type_decl(parser: &mut Parser) -> Declaration =
"type" _ "alias" _ alias:type_alias() { alias } /
"type" _ mutable:"mut"? _ name:type_singleton_name() _ "=" _ body:type_body(parser) {
Declaration::TypeDecl { name, body, mutable: mutable.is_some() }
}
rule type_singleton_name() -> TypeSingletonName =
name:identifier() params:type_params()? { TypeSingletonName {
name: rc_string(name), params: if let Some(params) = params { params } else { vec![] }
} }
rule type_params() -> Vec<TypeIdentifier> =
"<" _ idents:(type_identifier() ** (_ "," _)) _ ">" { idents }
rule type_identifier() -> TypeIdentifier =
"(" _ items:(type_identifier() ** (_ "," _)) _ ")" { TypeIdentifier::Tuple(items) } /
singleton:type_singleton_name() { TypeIdentifier::Singleton(singleton) }
rule type_body(parser: &mut Parser) -> TypeBody =
"{" _ items:(record_variant_item() ** (__ "," __)) __ "}" { TypeBody::ImmediateRecord(parser.fresh(), items) } /
variants:(variant_spec(parser) ** (__ "|" __)) { TypeBody::Variants(variants) }
rule variant_spec(parser: &mut Parser) -> Variant =
name:identifier() __ "{" __ typed_identifier_list:(record_variant_item() ** (__ "," __)) __ ","? __ "}" { Variant {
id: parser.fresh(), name: rc_string(name), kind: VariantKind::Record(typed_identifier_list)
} } /
name:identifier() "(" tuple_members:(type_identifier() ++ (__ "," __)) ")" { Variant {
id: parser.fresh(), name: rc_string(name), kind: VariantKind::TupleStruct(tuple_members) } } /
name:identifier() { Variant { id: parser.fresh(), name: rc_string(name), kind: VariantKind::UnitStruct } }
rule record_variant_item() -> (Rc<String>, TypeIdentifier) =
name:identifier() _ ":" _ ty:type_identifier() { (rc_string(name), ty) }
rule type_alias() -> Declaration =
alias:identifier() _ "=" _ name:identifier() { Declaration::TypeAlias { alias: rc_string(alias), original: rc_string(name), } }
rule type_anno() -> TypeIdentifier =
":" _ identifier:type_identifier() { identifier }
pub rule expression(parser: &mut Parser) -> Expression =
__ kind:expression_kind(true, parser) _ type_anno:type_anno()? { Expression { id: parser.fresh(), type_anno, kind } }
rule expression_no_struct(parser: &mut Parser) -> Expression =
__ kind:expression_kind(false, parser) { Expression { id: parser.fresh(), type_anno: None, kind: kind } }
rule expression_kind(struct_ok: bool, parser: &mut Parser) -> ExpressionKind =
precedence_expr(struct_ok, parser)
rule precedence_expr(struct_ok: bool, parser: &mut Parser) -> ExpressionKind =
first:prefix_expr(struct_ok, parser) _ next:(precedence_continuation(struct_ok, parser))* {
let next = next.into_iter().map(|(sigil, expr)| (BinOp::from_sigil(sigil), expr)).collect();
BinopSequence { first, next }.do_precedence(parser)
}
rule precedence_continuation(struct_ok: bool, parser: &mut Parser) -> (&'input str, ExpressionKind) =
op:operator() _ expr:prefix_expr(struct_ok, parser) _ { (op, expr) }
rule prefix_expr(struct_ok: bool, parser: &mut Parser) -> ExpressionKind =
prefix:prefix()? expr:extended_expr(struct_ok, parser) {
if let Some(p) = prefix {
let expr = Expression::new(parser.fresh(), expr);
let prefix = PrefixOp::from_sigil(p);
ExpressionKind::PrefixExp(prefix, Box::new(expr))
} else {
expr
}
}
rule prefix() -> &'input str =
$(['+' | '-' | '!' ])
//TODO make the definition of operators more complex
rule operator() -> &'input str =
quiet!{!"*/" s:$( ['+' | '-' | '*' | '/' | '%' | '<' | '>' | '=' | '!' | '$' | '&' | '|' | '?' | '^' | '`']+ ) { s } } /
expected!("operator")
rule extended_expr(struct_ok: bool, parser: &mut Parser) -> ExpressionKind =
primary:primary(struct_ok, parser) parts:(extended_expr_part(parser)*) {
let mut expression = Expression::new(parser.fresh(), primary);
for part in parts.into_iter() {
let kind = match part {
ExtendedPart::Index(indexers) => {
ExpressionKind::Index { indexee: Box::new(expression), indexers }
},
ExtendedPart::Accessor(name) => {
let name = rc_string(name);
ExpressionKind::Access { name, expr: Box::new(expression) }
},
ExtendedPart::Call(arguments) => {
ExpressionKind::Call { f: Box::new(expression), arguments }
}
};
expression = Expression::new(parser.fresh(), kind);
}
expression.kind
}
rule extended_expr_part(parser: &mut Parser) -> ExtendedPart<'input> =
indexers:index_part(parser) { ExtendedPart::Index(indexers) } /
arguments:call_part(parser) { ExtendedPart::Call(arguments) } /
"." name:identifier() { ExtendedPart::Accessor(name) }
rule index_part(parser: &mut Parser) -> Vec<Expression> =
"[" indexers:(expression(parser) ++ ",") "]" { indexers }
rule call_part(parser: &mut Parser) -> Vec<InvocationArgument> =
"(" arguments:(invocation_argument(parser) ** ",") ")" { arguments }
//TODO this shouldn't be an expression b/c type annotations disallowed here
rule invocation_argument(parser: &mut Parser) -> InvocationArgument =
_ "_" _ { InvocationArgument::Ignored } /
_ ident:identifier() _ "=" _ expr:expression(parser) { InvocationArgument::Keyword {
name: Rc::new(ident.to_string()),
expr
} } /
_ expr:expression(parser) _ { InvocationArgument::Positional(expr) }
rule primary(struct_ok: bool, parser: &mut Parser) -> ExpressionKind =
while_expr(parser) / for_expr(parser) / float_literal() / nat_literal() / bool_literal() /
string_literal() / paren_expr(parser) /
list_expr(parser) / if_expr(parser) / lambda_expr(parser) /
item:named_struct(parser) {? if struct_ok { Ok(item) } else { Err("no-struct-allowed") } } /
identifier_expr(parser)
rule lambda_expr(parser: &mut Parser) -> ExpressionKind =
r#"\"# __ "(" _ params:formal_params(parser) _ ")" _ type_anno:(type_anno()?) _ body:block(parser) {
ExpressionKind::Lambda { params, type_anno, body }
} /
r#"\"# param:formal_param(parser) _ type_anno:(type_anno()?) _ body:block(parser) {
ExpressionKind::Lambda { params: vec![param], type_anno, body }
}
rule for_expr(parser: &mut Parser) -> ExpressionKind =
"for" _ enumerators:for_enumerators(parser) _ body:for_body(parser) {
ExpressionKind::ForExpression { enumerators, body }
}
rule for_enumerators(parser: &mut Parser) -> Vec<Enumerator> =
"{" _ enumerators:(enumerator(parser) ++ ",") _ "}" { enumerators } /
enumerator:enumerator(parser) { vec![enumerator] }
//TODO add guards, etc.
rule enumerator(parser: &mut Parser) -> Enumerator =
ident:identifier() _ "<-" _ generator:expression_no_struct(parser) {
Enumerator { id: Rc::new(ident.to_string()), generator }
} /
//TODO need to distinguish these two cases in AST
ident:identifier() _ "=" _ generator:expression_no_struct(parser) {
Enumerator { id: Rc::new(ident.to_string()), generator }
}
rule for_body(parser: &mut Parser) -> Box<ForBody> =
"return" _ expr:expression(parser) { Box::new(ForBody::MonadicReturn(expr)) } /
body:block(parser) { Box::new(ForBody::StatementBlock(body)) }
rule while_expr(parser: &mut Parser) -> ExpressionKind =
"while" _ cond:expression_kind(false, parser)? _ body:block(parser) {
ExpressionKind::WhileExpression {
condition: cond.map(|kind| Box::new(Expression::new(parser.fresh(), kind))),
body,
}
}
rule identifier_expr(parser: &mut Parser) -> ExpressionKind =
qn:qualified_identifier(parser) { ExpressionKind::Value(qn) }
rule named_struct(parser: &mut Parser) -> ExpressionKind =
name:qualified_identifier(parser) _ fields:record_block(parser) {
ExpressionKind::NamedStruct {
name,
fields: fields.into_iter().map(|(n, exp)| (Rc::new(n.to_string()), exp)).collect(),
}
}
//TODO anonymous structs, update syntax for structs
rule record_block(parser: &mut Parser) -> Vec<(&'input str, Expression)> =
"{" _ entries:(record_entry(parser) ** ",") _ "}" { entries }
rule record_entry(parser: &mut Parser) -> (&'input str, Expression) =
_ name:identifier() _ ":" _ expr:expression(parser) _ { (name, expr) }
rule qualified_identifier(parser: &mut Parser) -> QualifiedName =
names:(identifier() ++ "::") { QualifiedName { id: parser.fresh(), components: names.into_iter().map(|name| Rc::new(name.to_string())).collect() } }
//TODO improve the definition of identifiers
rule identifier() -> &'input str =
!(reserved() !(ident_continuation())) text:$(['a'..='z' | 'A'..='Z' | '_'] ident_continuation()*) { text }
rule ident_continuation() -> &'input str =
text:$(['a'..='z' | 'A'..='Z' | '0'..='9' | '_'])
rule reserved() = "if" / "then" / "else" / "is" / "fn" / "for" / "while" / "let" / "in" / "mut" / "return" /
"break" / "alias" / "type" / "self" / "Self" / "interface" / "impl" / "true" / "false" / "module" / "import"
rule if_expr(parser: &mut Parser) -> ExpressionKind =
"if" _ discriminator:(expression(parser)?) _ body:if_expr_body(parser) {
ExpressionKind::IfExpression {
discriminator: discriminator.map(Box::new),
body: Box::new(body),
}
}
rule if_expr_body(parser: &mut Parser) -> IfExpressionBody =
cond_block(parser) / simple_pattern_match(parser) / simple_conditional(parser)
rule simple_conditional(parser: &mut Parser) -> IfExpressionBody =
"then" _ then_case:expr_or_block(parser) _ else_case:else_case(parser) {
IfExpressionBody::SimpleConditional { then_case, else_case }
}
rule simple_pattern_match(parser: &mut Parser) -> IfExpressionBody =
"is" _ pattern:pattern(parser) _ "then" _ then_case:expr_or_block(parser) _ else_case:else_case(parser) {
IfExpressionBody::SimplePatternMatch { pattern, then_case, else_case }
}
rule cond_block(parser: &mut Parser) -> IfExpressionBody =
"{" __ cond_arms:(cond_arm(parser) ++ (delimiter()+)) __ "}" { IfExpressionBody::CondList(cond_arms) }
rule cond_arm(parser: &mut Parser) -> ConditionArm =
_ "else" _ body:expr_or_block(parser) { ConditionArm { condition: Condition::Else, guard: None, body } } /
_ condition:condition(parser) _ guard:condition_guard(parser) _ "then" _ body:expr_or_block(parser)
{ ConditionArm { condition, guard, body } }
rule condition(parser: &mut Parser) -> Condition =
"is" _ pat:pattern(parser) { Condition::Pattern(pat) } /
op:operator() _ expr:expression(parser) { Condition::TruncatedOp(BinOp::from_sigil(op), expr) }
rule condition_guard(parser: &mut Parser) -> Option<Expression> =
("if" _ expr:expression(parser) { expr } )?
rule expr_or_block(parser: &mut Parser) -> Block = block(parser) / pos:position!() ex:expression(parser) {
Statement {
id: parser.fresh() , location: pos.into(),
kind: StatementKind::Expression(ex)
}.into()
}
rule else_case(parser: &mut Parser) -> Option<Block> =
("else" _ eorb:expr_or_block(parser) { eorb })?
rule pattern(parser: &mut Parser) -> Pattern =
"(" _ variants:(pattern(parser) ++ ",") _ ")" { Pattern::TuplePattern(variants) } /
_ pat:simple_pattern(parser) { pat }
rule simple_pattern(parser: &mut Parser) -> Pattern =
pattern_literal() /
qn:qualified_identifier(parser) "(" members:(pattern(parser) ** ",") ")" {
Pattern::TupleStruct(qn, members)
} /
qn:qualified_identifier(parser) _ "{" _ items:(record_pattern_entry(parser) ** ",") "}" _ {
let items = items.into_iter().map(|(name, pat)| (Rc::new(name.to_string()), pat)).collect();
Pattern::Record(qn, items)
} /
qn:qualified_identifier(parser) { Pattern::VarOrName(qn) }
rule record_pattern_entry(parser: &mut Parser) -> (&'input str, Pattern) =
_ name:identifier() _ ":" _ pat:pattern(parser) _ { (name, pat) } /
_ name:identifier() _ {
let qn = QualifiedName {
id: parser.fresh(),
components: vec![Rc::new(name.to_string())],
};
(name, Pattern::VarOrName(qn))
}
rule pattern_literal() -> Pattern =
"true" { Pattern::Literal(PatternLiteral::BoolPattern(true)) } /
"false" { Pattern::Literal(PatternLiteral::BoolPattern(false)) } /
s:bare_string_literal() { Pattern::Literal(PatternLiteral::StringPattern(Rc::new(s.to_string()))) } /
sign:("-"?) num:(float_literal() / nat_literal()) {
let neg = sign.is_some();
Pattern::Literal(PatternLiteral::NumPattern { neg, num })
} /
"_" { Pattern::Ignored }
rule list_expr(parser: &mut Parser) -> ExpressionKind =
"[" exprs:(expression(parser) ** ",") "]" {
let mut exprs = exprs;
ExpressionKind::ListLiteral(exprs)
}
rule paren_expr(parser: &mut Parser) -> ExpressionKind =
"(" exprs:(expression(parser) ** ",") ")" {
let mut exprs = exprs;
match exprs.len() {
1 => exprs.pop().unwrap().kind,
_ => ExpressionKind::TupleLiteral(exprs),
}
}
//TODO need to do something with prefix in the AST
rule string_literal() -> ExpressionKind =
prefix:identifier()? s:bare_string_literal(){ ExpressionKind::StringLiteral(Rc::new(s.to_string())) }
rule bare_string_literal() -> &'input str =
"\"" s:$(string_component()*) "\"" { s }
rule string_component() -> &'input str =
r#"\\"# { "\\" } /
r#"\""# { "\"" } /
r#"\t"# { "\t" } /
r#"\n"# { "\n" } /
ch:$([^ '"' ]) { ch }
rule bool_literal() -> ExpressionKind =
"true" { ExpressionKind::BoolLiteral(true) } / "false" { ExpressionKind::BoolLiteral(false) }
rule nat_literal() -> ExpressionKind =
bin_literal() / hex_literal() / unmarked_literal()
rule unmarked_literal() -> ExpressionKind =
digits:digits() { ExpressionKind::NatLiteral(digits.parse().unwrap()) }
rule bin_literal() -> ExpressionKind =
"0b" digits:bin_digits() {? parse_binary(digits).map(ExpressionKind::NatLiteral) }
rule hex_literal() -> ExpressionKind =
"0x" digits:hex_digits() {? parse_hex(digits).map(ExpressionKind::NatLiteral) }
rule float_literal() -> ExpressionKind =
ds:$( digits() "." digits()? / "." digits() ) { ExpressionKind::FloatLiteral(ds.parse().unwrap()) }
rule digits() -> &'input str = $((digit_group() "_"*)+)
rule bin_digits() -> &'input str = $((bin_digit_group() "_"*)+)
rule hex_digits() -> &'input str = $((hex_digit_group() "_"*)+)
rule digit_group() -> &'input str = $(['0'..='9']+)
rule bin_digit_group() -> &'input str = $(['0' | '1']+)
rule hex_digit_group() -> &'input str = $(['0'..='9' | 'a'..='f' | 'A'..='F']+)
}
}
fn parse_binary(digits: &str) -> Result<u64, &'static str> {
let mut result: u64 = 0;
let mut multiplier = 1;
for d in digits.chars().rev() {
match d {
'1' => result += multiplier,
'0' => (),
'_' => continue,
_ => unreachable!(),
}
multiplier = match multiplier.checked_mul(2) {
Some(m) => m,
None => return Err("Binary expression will overflow"),
}
}
Ok(result)
}
fn parse_hex(digits: &str) -> Result<u64, &'static str> {
let mut result: u64 = 0;
let mut multiplier: u64 = 1;
for d in digits.chars().rev() {
if d == '_' {
continue;
}
match d.to_digit(16) {
Some(n) => result += n as u64 * multiplier,
None => return Err("Internal parser error: invalid hex digit"),
}
multiplier = match multiplier.checked_mul(16) {
Some(m) => m,
None => return Err("Hexadecimal expression will overflow"),
}
}
Ok(result)
}
#[derive(Debug)]
struct BinopSequence {
first: ExpressionKind,
next: Vec<(BinOp, ExpressionKind)>,
}
impl BinopSequence {
fn do_precedence(self, parser: &mut Parser) -> ExpressionKind {
fn helper(
precedence: i32,
lhs: ExpressionKind,
rest: &mut Vec<(BinOp, ExpressionKind)>,
parser: &mut Parser,
) -> Expression {
let mut lhs = Expression::new(parser.fresh(), lhs);
loop {
let (next_op, next_rhs) = match rest.pop() {
Some((a, b)) => (a, b),
None => break,
};
let new_precedence = next_op.get_precedence();
if precedence >= new_precedence {
rest.push((next_op, next_rhs));
break;
}
let rhs = helper(new_precedence, next_rhs, rest, parser);
lhs = Expression::new(
parser.fresh(),
ExpressionKind::BinExp(next_op, Box::new(lhs), Box::new(rhs)),
);
}
lhs
}
let mut as_stack = self.next.into_iter().rev().collect();
helper(BinOp::min_precedence(), self.first, &mut as_stack, parser).kind
}
}

View File

@ -6,8 +6,8 @@ use std::{fmt::Write, rc::Rc};
use pretty_assertions::assert_eq;
use super::{tokenize, ParseResult, Parser};
use crate::{ast::*, tokenizing::Location};
use super::new::{schala_parser, Parser};
use crate::{ast::*, parsing::Location};
fn rc(s: &str) -> Rc<String> {
Rc::new(s.to_owned())
@ -17,18 +17,6 @@ fn bx<T>(item: T) -> Box<T> {
Box::new(item)
}
fn make_parser(input: &str) -> Parser {
let tokens: Vec<crate::tokenizing::Token> = tokenize(input);
let mut parser = super::Parser::new();
parser.add_new_tokens(tokens);
parser
}
fn parse(input: &str) -> ParseResult<AST> {
let mut parser = make_parser(input);
parser.parse()
}
fn stmt(kind: StatementKind) -> Statement {
Statement { location: Location::default(), id: ItemId::default(), kind }
}
@ -99,33 +87,43 @@ fn ty_simple(name: &str) -> TypeIdentifier {
macro_rules! assert_ast {
($input:expr, $statements:expr) => {
let ast = parse($input).unwrap();
let mut parser = Parser::new();
let ast = schala_parser::program($input, &mut parser);
let expected = AST { id: Default::default(), statements: $statements.into() };
println!("Expected: {}", expected);
println!("Actual: {}", ast);
assert_eq!(ast, expected);
if ast.is_err() {
println!("Parse error: {}", ast.unwrap_err());
panic!();
}
assert_eq!(ast.unwrap(), expected);
};
}
macro_rules! assert_fail {
($input:expr, $failure:expr) => {
let err = parse($input).unwrap_err();
assert_eq!(err.msg, $failure);
let mut parser = Parser::new();
let err = schala_parser::program($input, &mut parser).unwrap_err();
assert_eq!(err.to_string(), $failure);
};
}
macro_rules! assert_expr {
($input:expr, $correct:expr) => {
let mut parser = make_parser($input);
assert_eq!(parser.expression().unwrap(), $correct);
let mut parser = Parser::new();
let expr = schala_parser::expression($input, &mut parser);
if expr.is_err() {
println!("Expression parse error: {}", expr.unwrap_err());
panic!();
}
assert_eq!(expr.unwrap(), $correct);
};
}
macro_rules! assert_fail_expr {
($input:expr, $failure:expr) => {
let mut parser = make_parser($input);
let err = parser.expression().unwrap_err();
assert_eq!(err.msg, $failure);
let mut parser = Parser::new();
let _err = schala_parser::expression($input, &mut parser).unwrap_err();
//TODO make real tests for failures
//assert_eq!(err.to_string(), $failure);
};
}
#[test]
@ -141,7 +139,17 @@ fn basic_literals() {
assert_expr!("0xf_f_", expr(NatLiteral(255)));
assert_expr!("false", expr(BoolLiteral(false)));
assert_expr!("true", expr(BoolLiteral(true)));
}
#[test]
fn string_literals() {
use ExpressionKind::*;
assert_expr!(r#""""#, expr(StringLiteral(rc(""))));
assert_expr!(r#""hello""#, expr(StringLiteral(rc("hello"))));
assert_expr!(r#"b"some bytestring""#, expr(StringLiteral(rc("some bytestring"))));
//NOTE I'm not 100% sure this case is correct, but I'll deal with it later
assert_expr!(r#""Do \n \" escapes work\t""#, expr(StringLiteral(rc(r#"Do \n \" escapes work\t"#))));
}
#[test]
@ -150,6 +158,7 @@ fn list_literals() {
assert_expr!("[]", expr(ListLiteral(vec![])));
assert_expr!("[1,2]", expr(ListLiteral(vec![expr(NatLiteral(1)), expr(NatLiteral(2)),])));
assert_fail_expr!("[1,,2]", "some failure");
}
#[test]
@ -158,17 +167,13 @@ fn binexps() {
use StatementKind::Expression;
assert_expr!("0xf_f_+1", binop("+", expr(NatLiteral(255)), expr(NatLiteral(1))));
assert_eq!(
parse("3; 4; 4.3").unwrap(),
AST {
id: Default::default(),
statements: vec![
stmt(Expression(expr(NatLiteral(3)))),
stmt(Expression(expr(NatLiteral(4)))),
stmt(Expression(expr(FloatLiteral(4.3)))),
]
.into()
}
assert_ast!(
"3; 4; 4.3",
vec![
stmt(Expression(expr(NatLiteral(3)))),
stmt(Expression(expr(NatLiteral(4)))),
stmt(Expression(expr(FloatLiteral(4.3)))),
]
);
assert_expr!(
@ -307,12 +312,22 @@ fn named_struct() {
fn index() {
use ExpressionKind::*;
assert_expr!(
"a[b,c]",
"armok[b,c]",
expr(Index {
indexee: bx(expr(Value(qn!(a)))),
indexee: bx(expr(Value(qn!(armok)))),
indexers: vec![expr(Value(qn!(b))), expr(Value(qn!(c)))]
})
);
assert_expr!(
"a[b,c][1]",
expr(Index {
indexee: bx(expr(Index {
indexee: bx(expr(Value(qn!(a)))),
indexers: vec![expr(Value(qn!(b))), expr(Value(qn!(c)))]
})),
indexers: vec![expr(NatLiteral(1))]
})
);
assert_expr!(
"perspicacity()[a]",
expr(Index {
@ -357,7 +372,7 @@ fn for_expression() {
);
assert_expr!(
"for n <- someRange { f(n); }",
"for n <- someRange { f(n) ; }",
expr(ForExpression {
enumerators: vec![Enumerator { id: rc("n"), generator: expr(Value(qn!(someRange))) }],
body: bx(ForBody::StatementBlock(
@ -465,8 +480,9 @@ fn single_param_lambda() {
fn complex_lambdas() {
use ExpressionKind::*;
//TODO support this without the semicolon after the lambda
assert_ast! {
r#"fn wahoo() { let a = 10; \(x) { x + a } };
r#"fn wahoo() { let a = 10; \(x) { x + a }; }
wahoo()(3) "#,
vec![
fn_decl(Signature { name: rc("wahoo"), operator: false, type_anno: None, params: vec![] },
@ -501,7 +517,7 @@ fn complex_lambdas() {
#[test]
fn reserved_words() {
assert_fail!("module::item::call()", "Expected an identifier, got Colon");
assert_fail!("module::item::call()", "error at 1:7: expected ['a' ..= 'z' | 'A' ..= 'Z' | '_']");
}
#[test]
@ -765,7 +781,7 @@ fn functions() {
]
})
);
assert_fail!("a(b,,c)", "Expected a literal expression, got Comma");
assert_fail!("a(b,,c)","error at 1:5: expected one of \"(\", \".\", \"0b\", \"0x\", \"[\", \"\\\"\", \"_\", \"false\", \"for\", \"if\", \"true\", \"while\", ['+' | '-' | '!'], ['0' ..= '9'], ['a' ..= 'z' | 'A' ..= 'Z' | '_'], r#\"\\\"#");
assert_ast!(
"fn a(b, c: Int): Int",
@ -786,6 +802,19 @@ fn functions() {
type_anno: Some(TypeIdentifier::Singleton(TypeSingletonName { name: rc("Int"), params: vec![] })),
})))]
);
let source = r#"
fn some_function() {
}"#;
assert_ast!(
source,
vec![fn_decl(
Signature { name: rc("some_function"), operator: false, type_anno: None, params: vec![] },
vec![].into()
)]
);
}
#[test]
@ -795,7 +824,9 @@ fn max_function_params() {
write!(buf, "a{}, ", n).unwrap();
}
write!(buf, ") {{ return 20 }}").unwrap();
assert_fail!(&buf, "A function cannot have more than 255 arguments");
//assert_fail!(&buf, "A function cannot have more than 255 arguments");
//TODO better errors again
assert_fail!(&buf, "error at 1:1439: expected ['a' ..= 'z' | 'A' ..= 'Z' | '_']");
}
#[test]
@ -883,44 +914,43 @@ fn interface() {
#[test]
fn impls() {
use Declaration::{FuncSig, Impl};
use Declaration::{FuncDecl, Impl};
let block = vec![
FuncDecl(
Signature { name: rc("yolo"), operator: false, params: vec![], type_anno: None },
vec![].into(),
),
FuncDecl(
Signature { name: rc("swagg"), operator: false, params: vec![], type_anno: None },
vec![].into(),
),
];
assert_ast!(
"impl Heh { fn yolo(); fn swagg(); }",
vec![decl(Impl {
type_name: ty_simple("Heh"),
interface_name: None,
block: vec![
FuncSig(Signature { name: rc("yolo"), operator: false, params: vec![], type_anno: None }),
FuncSig(Signature { name: rc("swagg"), operator: false, params: vec![], type_anno: None })
]
})]
"impl Heh { fn yolo() { }; fn swagg() { } }",
vec![decl(Impl { type_name: ty_simple("Heh"), interface_name: None, block: block.clone() })]
);
//TODO `"impl Heh<X> { fn yolo() { }; fn swagg() { }; }"` ought to work
assert_ast!(
"impl Heh<X> { fn yolo(); fn swagg(); }",
"impl Heh<X> { fn yolo() { }; fn swagg() { } }",
vec![decl(Impl {
type_name: TypeIdentifier::Singleton(TypeSingletonName {
name: rc("Heh"),
params: vec![ty_simple("X")]
}),
interface_name: None,
block: vec![
FuncSig(Signature { name: rc("yolo"), operator: false, params: vec![], type_anno: None }),
FuncSig(Signature { name: rc("swagg"), operator: false, params: vec![], type_anno: None })
]
block: block.clone(),
})]
);
assert_ast!(
"impl Heh for Saraz { fn yolo(); fn swagg(); }",
"impl Heh for Saraz { fn yolo() {}; fn swagg() {} }",
vec![decl(Impl {
type_name: ty_simple("Saraz"),
interface_name: Some(TypeSingletonName { name: rc("Heh"), params: vec![] }),
block: vec![
FuncSig(Signature { name: rc("yolo"), operator: false, params: vec![], type_anno: None }),
FuncSig(Signature { name: rc("swagg"), operator: false, params: vec![], type_anno: None })
]
block: block.clone(),
})]
);
@ -1148,7 +1178,7 @@ fn pattern_matching() {
);
assert_expr!(
"if x { is 1 then 5, else 20 }",
"if x { is 1 then 5; else 20 }",
expr(IfExpression {
discriminator: Some(bx(expr(Value(qn!(x))))),
body: bx(IfExpressionBody::CondList(vec![
@ -1184,7 +1214,7 @@ fn pattern_matching() {
assert_expr! {
r#"
if (45, "panda", false, 2.2) {
is (49, "pablo", _, 28.4) then "no"
is (49, "pablo", _, 28.4) then "no"
is (_, "panda", _, -2.2) then "yes"
is _ then "maybe"
}"#,
@ -1263,3 +1293,62 @@ fn flow_control() {
)]
);
}
#[test]
fn blocks() {
use ExpressionKind::*;
let cases = ["{ a }", "{ a; }", "{a}", "{ a\n }", "{ a\n\n }", "{ a;\n\n; }"];
let mut parser = Parser::new();
for case in cases.iter() {
let block = schala_parser::block(case, &mut parser);
assert_eq!(block.unwrap(), vec![exst(Value(qn!(a)))].into());
}
let source = r#"{
fn quah() {
fn foo() { }
}
}"#;
let block = schala_parser::block(source, &mut parser);
assert_eq!(
block.unwrap(),
vec![decl(Declaration::FuncDecl(
Signature { name: rc("quah"), operator: false, params: vec![], type_anno: None },
vec![decl(Declaration::FuncDecl(
Signature { name: rc("foo"), operator: false, params: vec![], type_anno: None },
vec![].into(),
))]
.into()
))]
.into()
);
}
#[test]
fn comments() {
use ExpressionKind::*;
let source = "1 + /* hella /* bro */ */ 2";
assert_expr!(source, binop("+", expr(NatLiteral(1)), expr(NatLiteral(2))));
//TODO make sure this error message makes sense
let source = "1 + /* hella /* bro */ 2";
assert_fail_expr!(source, "foo");
let source = "1 + /* hella */ bro */ 2";
assert_fail_expr!(source, binop("+", expr(NatLiteral(1)), expr(NatLiteral(2))));
let source = "5//no man\n";
assert_ast!(source, vec![exst(NatLiteral(5))]);
}
//TODO support backtick operators like this
/*
#[test]
fn backtick_operators() {
let output = token_kinds("1 `plus` 2");
assert_eq!(output, vec![digit!("1"), op!("plus"), digit!("2")]);
}
*/

View File

@ -247,8 +247,6 @@ impl<'a, 'b> Reducer<'a, 'b> {
let mut alternatives = vec![];
for arm in condition_arms {
match arm.condition {
ast::Condition::Expression(ref _expr) =>
return Expression::ReductionError("case-expression".to_string()),
ast::Condition::Pattern(ref pat) => {
let alt = Alternative {
pattern: match pat.reduce(self.symbol_table) {

View File

@ -5,7 +5,7 @@ use schala_repl::{
use stopwatch::Stopwatch;
use crate::{
error::SchalaError, parsing, reduced_ir, symbol_table, tokenizing, tree_walk_eval, type_inference,
error::SchalaError, parsing, reduced_ir, symbol_table, tree_walk_eval, type_inference,
};
/// All the state necessary to parse and execute a Schala program are stored in this struct.
@ -19,7 +19,7 @@ pub struct Schala<'a> {
/// Contains information for type-checking
type_context: type_inference::TypeContext,
/// Schala Parser
active_parser: parsing::Parser,
active_parser: parsing::new::Parser,
/// Execution state for AST-walking interpreter
eval_state: tree_walk_eval::State<'a>,
@ -47,7 +47,7 @@ impl<'a> Schala<'a> {
source_reference: SourceReference::new(),
symbol_table: symbol_table::SymbolTable::new(),
type_context: type_inference::TypeContext::new(),
active_parser: parsing::Parser::new(),
active_parser: parsing::new::Parser::new(),
eval_state: tree_walk_eval::State::new(),
timings: Vec::new(),
}
@ -74,18 +74,10 @@ impl<'a> Schala<'a> {
self.timings = vec![];
let sw = Stopwatch::start_new();
// 1st stage - tokenization
// TODO tokenize should return its own error type
let tokens = tokenizing::tokenize(source);
if let Some(err) = SchalaError::from_tokens(&tokens) {
return Err(err);
}
//2nd stage - parsing
self.active_parser.add_new_tokens(tokens);
self.source_reference.load_new_source(source);
let ast = self
.active_parser
.parse()
.parse(source)
.map_err(|err| SchalaError::from_parse_error(err, &self.source_reference))?;
self.timings.push(("parsing", sw.elapsed()));
@ -122,31 +114,50 @@ impl<'a> Schala<'a> {
/// Represents lines of source code
pub(crate) struct SourceReference {
lines: Option<Vec<String>>,
last_source: Option<String>,
/// Offsets in *bytes* (not chars) representing a newline character
newline_offsets: Vec<usize>,
}
impl SourceReference {
fn new() -> SourceReference {
SourceReference { lines: None }
pub(crate) fn new() -> SourceReference {
SourceReference { last_source: None, newline_offsets: vec![] }
}
fn load_new_source(&mut self, source: &str) {
//TODO this is a lot of heap allocations - maybe there's a way to make it more efficient?
self.lines = Some(source.lines().map(|s| s.to_string()).collect());
pub(crate) fn load_new_source(&mut self, source: &str) {
self.newline_offsets = vec![];
for (offset, ch) in source.as_bytes().iter().enumerate() {
if *ch == ('\n' as u8) {
self.newline_offsets.push(offset);
}
}
self.last_source = Some(source.to_string());
}
pub fn get_line(&self, line: usize) -> String {
self.lines
.as_ref()
.and_then(|x| x.get(line).map(|s| s.to_string()))
.unwrap_or_else(|| "NO LINE FOUND".to_string())
// (line_start, line_num, the string itself)
pub fn get_line(&self, line: usize) -> (usize, usize, String) {
if self.newline_offsets.is_empty() {
return (0, 0, self.last_source.as_ref().cloned().unwrap());
}
//TODO make sure this is utf8-safe
let start_idx = match self.newline_offsets.binary_search(&line) {
Ok(index) | Err(index) => index,
};
let last_source = self.last_source.as_ref().unwrap();
let start = self.newline_offsets[start_idx];
let end = self.newline_offsets.get(start_idx + 1).cloned().unwrap_or_else(|| last_source.len());
let slice = &last_source.as_bytes()[start..end];
(start, start_idx, std::str::from_utf8(slice).unwrap().to_string())
}
}
#[allow(dead_code)]
#[derive(Clone, Copy, Debug)]
pub(crate) enum Stage {
Tokenizing,
Parsing,
Symbols,
ScopeResolution,
@ -156,7 +167,7 @@ pub(crate) enum Stage {
}
fn stage_names() -> Vec<&'static str> {
vec!["tokenizing", "parsing", "symbol-table", "typechecking", "ast-reduction", "ast-walking-evaluation"]
vec!["parsing", "symbol-table", "typechecking", "ast-reduction", "ast-walking-evaluation"]
}
#[derive(Default, Clone)]
@ -177,7 +188,6 @@ impl<'a> ProgrammingLanguageInterface for Schala<'a> {
fn run_computation(&mut self, request: ComputationRequest<Self::Config>) -> ComputationResponse {
let ComputationRequest { source, debug_requests: _, config: _ } = request;
self.source_reference.load_new_source(source);
let sw = Stopwatch::start_new();
let main_output =

View File

@ -10,7 +10,7 @@ use crate::{
ast,
ast::ItemId,
builtin::Builtin,
tokenizing::Location,
parsing::Location,
type_inference::{TypeContext, TypeId},
};

View File

@ -11,7 +11,7 @@ use crate::{
TypeSingletonName, Variant, VariantKind, AST,
},
builtin::Builtin,
tokenizing::Location,
parsing::Location,
type_inference::{self, PendingType, TypeBuilder, TypeContext, VariantBuilder},
};

View File

@ -2,7 +2,7 @@
use assert_matches::assert_matches;
use super::*;
use crate::{tokenizing::Location, util::quick_ast};
use crate::util::quick_ast;
fn add_symbols(src: &str) -> (SymbolTable, Result<(), Vec<SymbolError>>) {
let ast = quick_ast(src);
@ -79,9 +79,11 @@ fn no_type_definition_duplicates() {
let err = &errs[0];
match err {
SymbolError::DuplicateName { location, prev_name } => {
SymbolError::DuplicateName { location: _, prev_name } => {
assert_eq!(prev_name, &Fqsn::from_strs(&["Food"]));
assert_eq!(location, &Location { line_num: 2, char_num: 2 });
//TODO restore this Location test
//assert_eq!(location, &Location { line_num: 2, char_num: 2 });
}
_ => panic!(),
}
@ -135,7 +137,7 @@ fn dont_falsely_detect_duplicates() {
let a = 40;
77
}
let q = 39;
let q = 39
"#;
let (symbols, _) = add_symbols(source);
@ -171,7 +173,8 @@ fn second_inner_func() {
}
inner_func(x)
}"#;
}
"#;
let (symbols, _) = add_symbols(source);
assert!(symbols.fq_names.table.get(&make_fqsn(&["outer_func"])).is_some());
assert!(symbols.fq_names.table.get(&make_fqsn(&["outer_func", "inner_func"])).is_some());
@ -187,7 +190,8 @@ inner_func(x)
fn enclosing_scopes_3() {
let source = r#"
fn outer_func(x) {
fn inner_func(arg) {
fn inner_func(arg) {
arg
}

View File

@ -1,460 +0,0 @@
#![allow(clippy::upper_case_acronyms)]
use std::{
convert::{TryFrom, TryInto},
fmt,
iter::{Iterator, Peekable},
rc::Rc,
};
use itertools::Itertools;
/// A location in a particular source file. Note that the
/// sizes of the internal unsigned integer types limit
/// the size of a source file to 2^32 lines of
/// at most 2^16 characters, which should be plenty big.
#[derive(Debug, Clone, Copy, PartialEq, Default)]
pub struct Location {
pub(crate) line_num: u32,
pub(crate) char_num: u16,
}
impl fmt::Display for Location {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "{}:{}", self.line_num, self.char_num)
}
}
#[derive(Debug, PartialEq, Clone)]
pub enum TokenKind {
Newline,
Semicolon,
LParen,
RParen,
LSquareBracket,
RSquareBracket,
LAngleBracket,
RAngleBracket,
LCurlyBrace,
RCurlyBrace,
Pipe,
Backslash,
AtSign,
Comma,
Period,
Colon,
Underscore,
Slash,
Equals,
Operator(Rc<String>),
DigitGroup(Rc<String>),
HexLiteral(Rc<String>),
BinNumberSigil,
StrLiteral { s: Rc<String>, prefix: Option<Rc<String>> },
Identifier(Rc<String>),
Keyword(Kw),
EOF,
Error(String),
}
use self::TokenKind::*;
impl fmt::Display for TokenKind {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match self {
&Operator(ref s) => write!(f, "Operator({})", **s),
&DigitGroup(ref s) => write!(f, "DigitGroup({})", s),
&HexLiteral(ref s) => write!(f, "HexLiteral({})", s),
&StrLiteral { ref s, .. } => write!(f, "StrLiteral({})", s),
&Identifier(ref s) => write!(f, "Identifier({})", s),
&Error(ref s) => write!(f, "Error({})", s),
other => write!(f, "{:?}", other),
}
}
}
#[derive(Debug, Clone, Copy, PartialEq)]
pub enum Kw {
If,
Then,
Else,
Is,
Func,
For,
While,
Let,
In,
Mut,
Return,
Continue,
Break,
Alias,
Type,
SelfType,
SelfIdent,
Interface,
Impl,
True,
False,
Module,
Import,
}
impl TryFrom<&str> for Kw {
type Error = ();
fn try_from(value: &str) -> Result<Self, Self::Error> {
Ok(match value {
"if" => Kw::If,
"then" => Kw::Then,
"else" => Kw::Else,
"is" => Kw::Is,
"fn" => Kw::Func,
"for" => Kw::For,
"while" => Kw::While,
"let" => Kw::Let,
"in" => Kw::In,
"mut" => Kw::Mut,
"return" => Kw::Return,
"break" => Kw::Break,
"continue" => Kw::Continue,
"alias" => Kw::Alias,
"type" => Kw::Type,
"Self" => Kw::SelfType,
"self" => Kw::SelfIdent,
"interface" => Kw::Interface,
"impl" => Kw::Impl,
"true" => Kw::True,
"false" => Kw::False,
"module" => Kw::Module,
"import" => Kw::Import,
_ => return Err(()),
})
}
}
#[derive(Debug, Clone, PartialEq)]
pub struct Token {
pub kind: TokenKind,
pub(crate) location: Location,
}
impl Token {
pub fn to_string_with_metadata(&self) -> String {
format!("{}({})", self.kind, self.location)
}
pub fn get_kind(&self) -> TokenKind {
self.kind.clone()
}
}
const OPERATOR_CHARS: [char; 17] =
['!', '$', '%', '&', '*', '+', '-', '.', ':', '<', '>', '=', '?', '^', '|', '~', '`'];
fn is_operator(c: &char) -> bool {
OPERATOR_CHARS.iter().any(|x| x == c)
}
type CharData = (usize, usize, char);
pub fn tokenize(input: &str) -> Vec<Token> {
let mut tokens: Vec<Token> = Vec::new();
let mut input = Iterator::intersperse(input.lines().enumerate(), (0, "\n"))
.flat_map(|(line_idx, line)| line.chars().enumerate().map(move |(ch_idx, ch)| (line_idx, ch_idx, ch)))
.peekable();
while let Some((line_num, char_num, c)) = input.next() {
let cur_tok_kind = match c {
'/' => match input.peek().map(|t| t.2) {
Some('/') => {
for (_, _, c) in input.by_ref() {
if c == '\n' {
break;
}
}
continue;
}
Some('*') => {
input.next();
let mut comment_level = 1;
while let Some((_, _, c)) = input.next() {
if c == '*' && input.peek().map(|t| t.2) == Some('/') {
input.next();
comment_level -= 1;
} else if c == '/' && input.peek().map(|t| t.2) == Some('*') {
input.next();
comment_level += 1;
}
if comment_level == 0 {
break;
}
}
if comment_level != 0 {
Error("Unclosed comment".to_string())
} else {
continue;
}
}
_ => Slash,
},
c if c.is_whitespace() && c != '\n' => continue,
'\n' => Newline,
';' => Semicolon,
':' => Colon,
',' => Comma,
'(' => LParen,
')' => RParen,
'{' => LCurlyBrace,
'}' => RCurlyBrace,
'[' => LSquareBracket,
']' => RSquareBracket,
'"' => handle_quote(&mut input, None),
'\\' => Backslash,
'@' => AtSign,
c if c.is_digit(10) => handle_digit(c, &mut input),
c if c.is_alphabetic() || c == '_' => handle_alphabetic(c, &mut input),
c if is_operator(&c) => handle_operator(c, &mut input),
unknown => Error(format!("Unexpected character: {}", unknown)),
};
let location =
Location { line_num: line_num.try_into().unwrap(), char_num: char_num.try_into().unwrap() };
tokens.push(Token { kind: cur_tok_kind, location });
}
tokens
}
fn handle_digit(c: char, input: &mut Peekable<impl Iterator<Item = CharData>>) -> TokenKind {
let next_ch = input.peek().map(|&(_, _, c)| c);
if c == '0' && next_ch == Some('x') {
input.next();
let rest: String = input
.peeking_take_while(|&(_, _, ref c)| c.is_digit(16) || *c == '_')
.map(|(_, _, c)| c)
.collect();
HexLiteral(Rc::new(rest))
} else if c == '0' && next_ch == Some('b') {
input.next();
BinNumberSigil
} else {
let mut buf = c.to_string();
buf.extend(input.peeking_take_while(|&(_, _, ref c)| c.is_digit(10)).map(|(_, _, c)| c));
DigitGroup(Rc::new(buf))
}
}
fn handle_quote(
input: &mut Peekable<impl Iterator<Item = CharData>>,
quote_prefix: Option<&str>,
) -> TokenKind {
let mut buf = String::new();
loop {
match input.next().map(|(_, _, c)| c) {
Some('"') => break,
Some('\\') => {
let next = input.peek().map(|&(_, _, c)| c);
if next == Some('n') {
input.next();
buf.push('\n')
} else if next == Some('"') {
input.next();
buf.push('"');
} else if next == Some('t') {
input.next();
buf.push('\t');
}
}
Some(c) => buf.push(c),
None => return TokenKind::Error("Unclosed string".to_string()),
}
}
TokenKind::StrLiteral { s: Rc::new(buf), prefix: quote_prefix.map(|s| Rc::new(s.to_string())) }
}
fn handle_alphabetic(c: char, input: &mut Peekable<impl Iterator<Item = CharData>>) -> TokenKind {
let mut buf = String::new();
buf.push(c);
let next_is_alphabetic = input.peek().map(|&(_, _, c)| !c.is_alphabetic()).unwrap_or(true);
if c == '_' && next_is_alphabetic {
return TokenKind::Underscore;
}
loop {
match input.peek().map(|&(_, _, c)| c) {
Some(c) if c == '"' => {
input.next();
return handle_quote(input, Some(&buf));
}
Some(c) if c.is_alphanumeric() || c == '_' => {
input.next();
buf.push(c);
}
_ => break,
}
}
match Kw::try_from(buf.as_str()) {
Ok(kw) => TokenKind::Keyword(kw),
Err(()) => TokenKind::Identifier(Rc::new(buf)),
}
}
fn handle_operator(c: char, input: &mut Peekable<impl Iterator<Item = CharData>>) -> TokenKind {
match c {
'<' | '>' | '|' | '.' | '=' => {
let next = &input.peek().map(|&(_, _, c)| c);
let next_is_op = next.map(|n| is_operator(&n)).unwrap_or(false);
if !next_is_op {
return match c {
'<' => LAngleBracket,
'>' => RAngleBracket,
'|' => Pipe,
'.' => Period,
'=' => Equals,
_ => unreachable!(),
};
}
}
_ => (),
};
let mut buf = String::new();
if c == '`' {
loop {
match input.peek().map(|&(_, _, c)| c) {
Some(c) if c.is_alphabetic() || c == '_' => {
input.next();
buf.push(c);
}
Some('`') => {
input.next();
break;
}
_ => break,
}
}
} else {
buf.push(c);
loop {
match input.peek().map(|&(_, _, c)| c) {
Some(c) if is_operator(&c) => {
input.next();
buf.push(c);
}
_ => break,
}
}
}
TokenKind::Operator(Rc::new(buf))
}
#[cfg(test)]
mod schala_tokenizer_tests {
use super::{Kw::*, *};
macro_rules! digit {
($ident:expr) => {
DigitGroup(Rc::new($ident.to_string()))
};
}
macro_rules! ident {
($ident:expr) => {
Identifier(Rc::new($ident.to_string()))
};
}
macro_rules! op {
($ident:expr) => {
Operator(Rc::new($ident.to_string()))
};
}
fn token_kinds(input: &str) -> Vec<TokenKind> {
tokenize(input).into_iter().map(move |tok| tok.kind).collect()
}
#[test]
fn tokens() {
let output = token_kinds("let a: A<B> = c ++ d");
assert_eq!(
output,
vec![
Keyword(Let),
ident!("a"),
Colon,
ident!("A"),
LAngleBracket,
ident!("B"),
RAngleBracket,
Equals,
ident!("c"),
op!("++"),
ident!("d")
]
);
}
#[test]
fn underscores() {
let output = token_kinds("4_8");
assert_eq!(output, vec![digit!("4"), Underscore, digit!("8")]);
let output = token_kinds("aba_yo");
assert_eq!(output, vec![ident!("aba_yo")]);
}
#[test]
fn comments() {
let output = token_kinds("1 + /* hella /* bro */ */ 2");
assert_eq!(output, vec![digit!("1"), op!("+"), digit!("2")]);
let output = token_kinds("1 + /* hella /* bro */ 2");
assert_eq!(output, vec![digit!("1"), op!("+"), Error("Unclosed comment".to_string())]);
//TODO not sure if I want this behavior
let output = token_kinds("1 + /* hella */ bro */ 2");
assert_eq!(
output,
vec![
digit!("1"),
op!("+"),
Identifier(Rc::new("bro".to_string())),
Operator(Rc::new("*".to_string())),
Slash,
DigitGroup(Rc::new("2".to_string()))
]
);
}
#[test]
fn backtick_operators() {
let output = token_kinds("1 `plus` 2");
assert_eq!(output, vec![digit!("1"), op!("plus"), digit!("2")]);
}
#[test]
fn string_literals() {
let output = token_kinds(r#""some string""#);
assert_eq!(output, vec![StrLiteral { s: Rc::new("some string".to_string()), prefix: None }]);
let output = token_kinds(r#"b"some bytestring""#);
assert_eq!(
output,
vec![StrLiteral {
s: Rc::new("some bytestring".to_string()),
prefix: Some(Rc::new("b".to_string()))
}]
);
let output = token_kinds(r#""Do \n \" escapes work\t""#);
assert_eq!(
output,
vec![StrLiteral { s: Rc::new("Do \n \" escapes work\t".to_string()), prefix: None }]
);
}
}

View File

@ -43,7 +43,7 @@ fn test_basic_eval() {
#[test]
fn op_eval() {
eval_assert("- 13", "-13");
eval_assert("-13", "-13");
eval_assert("10 - 2", "8");
}
@ -96,7 +96,7 @@ trad()"#,
);
let err =
"No symbol found for name: QualifiedName { id: Id { idx: 4, t: PhantomData }, components: [\"a\"] }";
"No symbol found for name: QualifiedName { id: Id { idx: 9, t: PhantomData }, components: [\"a\"] }";
eval_assert_failure(
r#"
@ -271,26 +271,26 @@ fn full_if_matching() {
let source = r#"
type Option<T> = Some(T) | None
let a = Option::None
if a { is Option::None then 4, is Option::Some(x) then x }
if a { is Option::None then 4; is Option::Some(x) then x }
"#;
eval_assert(source, "4");
let source = r#"
type Option<T> = Some(T) | None
let sara = Option::Some(99)
if sara { is Option::None then 1 + 3, is Option::Some(x) then x }
if sara { is Option::None then 1 + 3; is Option::Some(x) then x }
"#;
eval_assert(source, "99");
let source = r#"
let a = 10
if a { is 10 then "x", is 4 then "y" }
if a { is 10 then "x"; is 4 then "y" }
"#;
eval_assert(source, "\"x\"");
let source = r#"
let a = 10
if a { is 15 then "x", is 10 then "y" }
if a { is 15 then "x"; is 10 then "y" }
"#;
eval_assert(source, "\"y\"");
}
@ -300,7 +300,7 @@ if a { is 15 then "x", is 10 then "y" }
fn string_pattern() {
let source = r#"
let a = "foo"
if a { is "foo" then "x", is _ then "y" }
if a { is "foo" then "x"; is _ then "y" }
"#;
eval_assert(source, "\"x\"");
}
@ -310,7 +310,7 @@ fn boolean_pattern() {
let source = r#"
let a = true
if a {
is true then "x",
is true then "x"
is false then "y"
}
"#;
@ -321,7 +321,7 @@ if a {
fn boolean_pattern_2() {
let source = r#"
let a = false
if a { is true then "x", is false then "y" }
if a { is true then "x"; is false then "y" }
"#;
eval_assert(source, "\"y\"");
}
@ -341,7 +341,7 @@ if Option::Some(10) {
fn tuple_pattern() {
let source = r#"
if (1, 2) {
is (1, x) then x,
is (1, x) then x;
is _ then 99
}
"#;
@ -352,7 +352,7 @@ if (1, 2) {
fn tuple_pattern_2() {
let source = r#"
if (1, 2) {
is (10, x) then x,
is (10, x) then x
is (y, x) then x + y
}
"#;
@ -363,7 +363,7 @@ if (1, 2) {
fn tuple_pattern_3() {
let source = r#"
if (1, 5) {
is (10, x) then x,
is (10, x) then x
is (1, x) then x
}
"#;
@ -374,8 +374,8 @@ if (1, 5) {
fn tuple_pattern_4() {
let source = r#"
if (1, 5) {
is (10, x) then x,
is (1, x) then x,
is (10, x) then x
is (1, x) then x
}
"#;
eval_assert(source, "5");
@ -390,21 +390,21 @@ let b = Stuff::Jugs(1, "haha")
let c = Stuff::Mardok
let x = if a {
is Stuff::Mulch(20) then "x",
is Stuff::Mulch(20) then "x"
is _ then "ERR"
}
let y = if b {
is Stuff::Mulch(n) then "ERR",
is Stuff::Jugs(2, _) then "ERR",
is Stuff::Jugs(1, s) then s,
is _ then "ERR",
is Stuff::Mulch(n) then "ERR"
is Stuff::Jugs(2, _) then "ERR"
is Stuff::Jugs(1, s) then s
is _ then "ERR"
}
let z = if c {
is Stuff::Jugs(_, _) then "ERR",
is Stuff::Mardok then "NIGH",
is _ then "ERR",
is Stuff::Jugs(_, _) then "ERR"
is Stuff::Mardok then "NIGH"
is _ then "ERR"
}
(x, y, z)

View File

@ -52,10 +52,8 @@ where T: Hash + Eq
/// Quickly create an AST from a string, with no error checking. For test use only
#[cfg(test)]
pub fn quick_ast(input: &str) -> crate::ast::AST {
let tokens = crate::tokenizing::tokenize(input);
let mut parser = crate::parsing::Parser::new();
parser.add_new_tokens(tokens);
let output = parser.parse();
let mut parser = crate::parsing::new::Parser::new();
let output = parser.parse(input);
output.unwrap()
}

View File

@ -77,7 +77,7 @@ x is Some(t) // type bool
if x {
is Some(t) => {
},
}
is None => {
}