From 94ee3e18970b46906682cd1e241d16ab7f6fafcb Mon Sep 17 00:00:00 2001 From: Greg Shuflin Date: Sun, 14 Nov 2021 03:18:05 -0800 Subject: [PATCH] Delete a bunch of now-obsolete parsing/tokenizing code --- schala-lang/language/src/ast/mod.rs | 2 +- schala-lang/language/src/ast/operators.rs | 30 - schala-lang/language/src/error.rs | 23 +- schala-lang/language/src/lib.rs | 2 - schala-lang/language/src/parsing/mod.rs | 1262 +---------------- schala-lang/language/src/parsing/new.rs | 5 - schala-lang/language/src/parsing/test.rs | 2 +- schala-lang/language/src/schala.rs | 5 +- schala-lang/language/src/symbol_table/mod.rs | 2 +- .../language/src/symbol_table/populator.rs | 2 +- schala-lang/language/src/symbol_table/test.rs | 4 +- schala-lang/language/src/tokenizing.rs | 464 ------ 12 files changed, 21 insertions(+), 1782 deletions(-) delete mode 100644 schala-lang/language/src/tokenizing.rs diff --git a/schala-lang/language/src/ast/mod.rs b/schala-lang/language/src/ast/mod.rs index 2d9495e..40ff36a 100644 --- a/schala-lang/language/src/ast/mod.rs +++ b/schala-lang/language/src/ast/mod.rs @@ -17,7 +17,7 @@ pub use visitor::*; use crate::{ derivative::Derivative, identifier::{define_id_kind, Id}, - tokenizing::Location, + parsing::Location, }; define_id_kind!(ASTItem); diff --git a/schala-lang/language/src/ast/operators.rs b/schala-lang/language/src/ast/operators.rs index 1bdc015..06e6ed1 100644 --- a/schala-lang/language/src/ast/operators.rs +++ b/schala-lang/language/src/ast/operators.rs @@ -1,7 +1,5 @@ use std::rc::Rc; -use crate::tokenizing::TokenKind; - #[derive(Debug, PartialEq, Clone)] pub struct PrefixOp { sigil: Rc, @@ -15,10 +13,6 @@ impl PrefixOp { pub fn sigil(&self) -> &str { &self.sigil } - - pub fn is_prefix(op: &str) -> bool { - matches!(op, "+" | "-" | "!") - } } #[derive(Debug, PartialEq, Clone)] @@ -35,38 +29,14 @@ impl BinOp { &self.sigil } - pub fn from_sigil_token(tok: &TokenKind) -> Option { - let s = token_kind_to_sigil(tok)?; - Some(BinOp::from_sigil(s)) - } - pub fn min_precedence() -> i32 { i32::min_value() } - pub fn get_precedence_from_token(op_tok: &TokenKind) -> Option { - let s = token_kind_to_sigil(op_tok)?; - Some(binop_precedences(s)) - } - pub fn get_precedence(&self) -> i32 { binop_precedences(self.sigil.as_ref()) } } -fn token_kind_to_sigil(tok: &TokenKind) -> Option<&str> { - use self::TokenKind::*; - Some(match tok { - Operator(op) => op.as_str(), - Period => ".", - Pipe => "|", - Slash => "/", - LAngleBracket => "<", - RAngleBracket => ">", - Equals => "=", - _ => return None, - }) -} - fn binop_precedences(s: &str) -> i32 { let default = 10_000_000; match s { diff --git a/schala-lang/language/src/error.rs b/schala-lang/language/src/error.rs index 78a9afc..795aeec 100644 --- a/schala-lang/language/src/error.rs +++ b/schala-lang/language/src/error.rs @@ -1,8 +1,7 @@ use crate::{ - parsing::ParseError, + parsing::{Location, ParseError}, schala::{SourceReference, Stage}, symbol_table::SymbolError, - tokenizing::{Location, Token, TokenKind}, type_inference::TypeError, }; @@ -52,26 +51,6 @@ impl SchalaError { errors: vec![], } } - - pub(crate) fn from_tokens(tokens: &[Token]) -> Option { - let token_errors: Vec = tokens - .iter() - .filter_map(|tok| match tok.kind { - TokenKind::Error(ref err) => Some(Error { - location: Some(tok.location), - text: Some(err.clone()), - stage: Stage::Tokenizing, - }), - _ => None, - }) - .collect(); - - if token_errors.is_empty() { - None - } else { - Some(SchalaError { errors: token_errors, formatted_parse_error: None }) - } - } } #[allow(dead_code)] diff --git a/schala-lang/language/src/lib.rs b/schala-lang/language/src/lib.rs index d024369..3ac261d 100644 --- a/schala-lang/language/src/lib.rs +++ b/schala-lang/language/src/lib.rs @@ -7,7 +7,6 @@ //! `ProgrammingLanguageInterface` and the chain of compiler passes for it. extern crate schala_repl; -#[macro_use] extern crate schala_lang_codegen; extern crate derivative; @@ -19,7 +18,6 @@ mod type_inference; mod ast; mod parsing; -mod tokenizing; #[macro_use] mod symbol_table; mod builtin; diff --git a/schala-lang/language/src/parsing/mod.rs b/schala-lang/language/src/parsing/mod.rs index e39915e..485519a 100644 --- a/schala-lang/language/src/parsing/mod.rs +++ b/schala-lang/language/src/parsing/mod.rs @@ -171,1266 +171,28 @@ pub mod new; mod test; -use std::rc::Rc; - -use crate::{ - ast::*, - identifier::IdStore, - tokenizing::{Kw::*, Location, TokenKind::*, *}, -}; +use std::fmt; /// Represents a parsing error #[derive(Debug)] pub struct ParseError { pub msg: String, pub location: Location, - pub token: Token, } -impl ParseError { - fn new_with_token(msg: M, token: Token) -> ParseResult - where M: Into { - Err(ParseError { msg: msg.into(), location: Default::default(), token }) +#[derive(Debug, Clone, Copy, PartialEq, Default)] +pub struct Location { + pub(crate) offset: usize, +} + +impl From for Location { + fn from(offset: usize) -> Self { + Self { offset } } } -/// Represents either a successful parsing result or a ParseError -pub type ParseResult = Result; - -#[derive(Debug)] -pub struct ParseRecord { - production_name: String, - next_token: String, - level: u32, -} - -/// Main data structure for doing parsing. -pub struct Parser { - token_handler: TokenHandler, - parse_record: Vec, - parse_level: u32, - restrictions: ParserRestrictions, - id_store: IdStore, -} - -struct ParserRestrictions { - no_struct_literal: bool, -} - -struct TokenHandler { - tokens: Vec, - idx: usize, - end_of_file: Location, -} - -impl TokenHandler { - fn new(tokens: Vec) -> TokenHandler { - let end_of_file = match tokens.last() { - None => Location { offset: 0 }, - Some(t) => t.location, - }; - TokenHandler { idx: 0, tokens, end_of_file } - } - - fn peek_kind(&mut self) -> TokenKind { - self.peek().kind - } - - fn peek_kind_n(&mut self, n: usize) -> TokenKind { - self.peek_n(n).kind - } - fn peek(&mut self) -> Token { - self.tokens - .get(self.idx) - .cloned() - .unwrap_or(Token { kind: TokenKind::EOF, location: self.end_of_file }) - } - /// calling peek_n(0) is the same thing as peek() - fn peek_n(&mut self, n: usize) -> Token { - self.tokens - .get(self.idx + n) - .cloned() - .unwrap_or(Token { kind: TokenKind::EOF, location: self.end_of_file }) - } - fn next(&mut self) -> Token { - self.idx += 1; - self.tokens - .get(self.idx - 1) - .cloned() - .unwrap_or(Token { kind: TokenKind::EOF, location: self.end_of_file }) +impl fmt::Display for Location { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "{}", self.offset) } } - -impl Parser { - /// Create a new parser initialized with some tokens. - pub fn new() -> Parser { - Parser { - token_handler: TokenHandler::new(vec![]), - parse_record: vec![], - parse_level: 0, - restrictions: ParserRestrictions { no_struct_literal: false }, - id_store: IdStore::new(), - } - } - - pub fn add_new_tokens(&mut self, new_tokens: Vec) { - self.token_handler = TokenHandler::new(new_tokens); - } - - /// Parse all loaded tokens up to this point. - pub fn parse(&mut self) -> ParseResult { - self.program() - } - - #[allow(dead_code)] - pub fn format_parse_trace(&self) -> String { - let mut buf = String::new(); - buf.push_str("Parse productions:\n"); - let mut next_token = None; - for r in self.parse_record.iter() { - let mut indent = String::new(); - for _ in 0..r.level { - indent.push('.'); - } - let effective_token = if next_token == Some(&r.next_token) { - "".to_string() - } else { - next_token = Some(&r.next_token); - format!(", next token: {}", r.next_token) - }; - buf.push_str(&format!("{}`{}`{}\n", indent, "XXX", effective_token)); - } - buf - } -} - -macro_rules! print_token_pattern { - ($tokenpattern:pat) => { - stringify!($tokenpattern) - }; -} - -macro_rules! expect { - ($self:expr, $token_kind:pat) => { expect!($self, $token_kind if true) }; - ($self:expr, $expected_kind:pat if $cond:expr) => { - { - let tok = $self.token_handler.peek(); - match tok.get_kind() { - $expected_kind if $cond => $self.token_handler.next(), - actual_kind => { - let msg = format!("Expected {}, got {:?}", print_token_pattern!($expected_kind), actual_kind); - return ParseError::new_with_token(msg, tok); - } - } - } - } -} - -macro_rules! delimited { - ($self:expr, $start:pat, $parse_fn:ident, $delim:pat, $end:pat, nonstrict) => { - delimited!($self, $start, $parse_fn, $delim, $end, false) - }; - ($self:expr, $start:pat, $parse_fn:ident, $delim:pat, $end:pat) => { - delimited!($self, $start, $parse_fn, $delim, $end, true) - }; - ($self:expr, $start:pat, $parse_fn:ident, $delim:pat, $end:pat, $strictness:expr) => {{ - expect!($self, $start); - let mut acc = vec![]; - loop { - let peek = $self.token_handler.peek(); - match peek.get_kind() { - $end | EOF => break, - Newline | Semicolon => { - $self.token_handler.next(); - continue; - } - _ => (), - } - if !$strictness { - match peek.get_kind() { - $delim => { - $self.token_handler.next(); - continue; - } - _ => (), - } - } - acc.push($self.$parse_fn()?); - match $self.token_handler.peek().get_kind() { - $delim => { - $self.token_handler.next(); - continue; - } - _ if $strictness => break, - _ => continue, - }; - } - expect!($self, $end); - acc - }}; -} - -impl Parser { - /// `program := (statement delimiter)* EOF` - /// `delimiter := NEWLINE | ';'` - #[recursive_descent_method] - fn program(&mut self) -> ParseResult { - let mut statements = Vec::new(); - loop { - match self.token_handler.peek().get_kind() { - EOF => break, - Newline | Semicolon => { - self.token_handler.next(); - continue; - } - _ => statements.push(self.statement()?), - } - } - Ok(AST { id: self.id_store.fresh(), statements: statements.into() }) - } - - /// `statement := expression | declaration` - #[recursive_descent_method] - fn statement(&mut self) -> ParseResult { - //TODO handle error recovery here - let tok = self.token_handler.peek(); - let kind = match tok.get_kind() { - AtSign | Keyword(Let) | Keyword(Type) | Keyword(Func) | Keyword(Interface) | Keyword(Impl) - | Keyword(Module) => self.declaration().map(StatementKind::Declaration), - Keyword(Continue) | Keyword(Return) | Keyword(Break) => - self.flow_control().map(StatementKind::Flow), - Keyword(Import) => self.import_declaration().map(StatementKind::Import), - _ => self.expression().map(StatementKind::Expression), - }?; - let id = self.id_store.fresh(); - Ok(Statement { kind, id, location: tok.location }) - } - - fn declaration(&mut self) -> ParseResult { - match self.token_handler.peek_kind() { - AtSign => self.annotation(), - Keyword(Let) => self.binding_declaration(), - Keyword(Type) => self.type_declaration(), - Keyword(Func) => self.func_declaration(), - Keyword(Interface) => self.interface_declaration(), - Keyword(Impl) => self.impl_declaration(), - Keyword(Module) => self.module_declaration(), - _ => return ParseError::new_with_token("Bad parse state encountered", self.token_handler.peek()), - } - } - - #[recursive_descent_method] - fn flow_control(&mut self) -> ParseResult { - let tok = self.token_handler.next(); - Ok(match tok.get_kind() { - Keyword(Continue) => FlowControl::Continue, - Keyword(Break) => FlowControl::Break, - Keyword(Return) => match self.token_handler.peek_kind() { - Semicolon | Newline => FlowControl::Return(None), - _ => FlowControl::Return(Some(self.expression()?)), - }, - _ => unreachable!(), - }) - } - - //TODO make it possible to annotate other types of things - #[recursive_descent_method] - fn annotation(&mut self) -> ParseResult { - expect!(self, AtSign); - let name = self.identifier()?; - let arguments = if let LParen = self.token_handler.peek().get_kind() { - delimited!(self, LParen, expression, Comma, RParen) - } else { - vec![] - }; - if let Semicolon | Newline = self.token_handler.peek_kind() { - self.token_handler.next(); - } - let inner = Box::new(self.statement()?); - Ok(Declaration::Annotation { name, arguments, inner }) - } - - #[recursive_descent_method] - fn type_declaration(&mut self) -> ParseResult { - expect!(self, Keyword(Type)); - self.type_declaration_body() - } - - #[recursive_descent_method] - fn type_declaration_body(&mut self) -> ParseResult { - if let Keyword(Alias) = self.token_handler.peek_kind() { - self.type_alias() - } else { - let mutable = if let Keyword(Mut) = self.token_handler.peek_kind() { - self.token_handler.next(); - true - } else { - false - }; - let name = self.type_singleton_name()?; - expect!(self, Equals); - let body = self.type_body()?; - Ok(Declaration::TypeDecl { name, body, mutable }) - } - } - - #[recursive_descent_method] - fn type_alias(&mut self) -> ParseResult { - expect!(self, Keyword(Alias)); - let alias = self.identifier()?; - expect!(self, Equals); - let original = self.identifier()?; - Ok(Declaration::TypeAlias { alias, original }) - } - - #[recursive_descent_method] - fn type_body(&mut self) -> ParseResult { - Ok(if let LCurlyBrace = self.token_handler.peek_kind() { - let typed_identifier_list = delimited!(self, LCurlyBrace, typed_identifier, Comma, RCurlyBrace); - TypeBody::ImmediateRecord(self.id_store.fresh(), typed_identifier_list) - } else { - let mut variants = vec![self.variant_specifier()?]; - while let Pipe = self.token_handler.peek_kind() { - self.token_handler.next(); - variants.push(self.variant_specifier()?); - } - TypeBody::Variants(variants) - }) - } - - #[recursive_descent_method] - fn variant_specifier(&mut self) -> ParseResult { - let name = self.identifier()?; - let kind = match self.token_handler.peek_kind() { - LParen => { - let tuple_members = delimited!(self, LParen, type_name, Comma, RParen); - VariantKind::TupleStruct(tuple_members) - } - LCurlyBrace => { - let typed_identifier_list = - delimited!(self, LCurlyBrace, typed_identifier, Comma, RCurlyBrace); - VariantKind::Record(typed_identifier_list) - } - _ => VariantKind::UnitStruct, - }; - Ok(Variant { id: self.id_store.fresh(), name, kind }) - } - - #[recursive_descent_method] - fn typed_identifier(&mut self) -> ParseResult<(Rc, TypeIdentifier)> { - let identifier = self.identifier()?; - expect!(self, Colon); - let type_name = self.type_name()?; - Ok((identifier, type_name)) - } - - #[recursive_descent_method] - fn func_declaration(&mut self) -> ParseResult { - let signature = self.func_signature()?; - if let LCurlyBrace = self.token_handler.peek_kind() { - let statements = self.nonempty_func_body()?.into(); - Ok(Declaration::FuncDecl(signature, statements)) - } else { - Ok(Declaration::FuncSig(signature)) - } - } - - #[recursive_descent_method] - fn func_signature(&mut self) -> ParseResult { - expect!(self, Keyword(Func)); - let (name, operator) = match self.token_handler.peek_kind() { - Operator(s) => { - let name = s; - self.token_handler.next(); - (name, true) - } - _ => (self.identifier()?, false), - }; - let params = self.formal_param_list()?; - let type_anno = match self.token_handler.peek_kind() { - Colon => Some(self.type_anno()?), - _ => None, - }; - Ok(Signature { name, operator, params, type_anno }) - } - - #[recursive_descent_method] - fn nonempty_func_body(&mut self) -> ParseResult> { - let statements = - delimited!(self, LCurlyBrace, statement, Newline | Semicolon, RCurlyBrace, nonstrict); - Ok(statements) - } - - #[recursive_descent_method] - fn formal_param_list(&mut self) -> ParseResult> { - let tok = self.token_handler.peek(); - let list = delimited!(self, LParen, formal_param, Comma, RParen); - if list.len() > 255 { - ParseError::new_with_token("A function cannot have more than 255 arguments", tok.clone()) - } else { - Ok(list) - } - } - - #[recursive_descent_method] - fn formal_param(&mut self) -> ParseResult { - let name = self.identifier()?; - let anno = match self.token_handler.peek_kind() { - Colon => Some(self.type_anno()?), - _ => None, - }; - let default = match self.token_handler.peek_kind() { - Equals => { - self.token_handler.next(); - Some(self.expression()?) - } - _ => None, - }; - Ok(FormalParam { name, anno, default }) - } - - #[recursive_descent_method] - fn binding_declaration(&mut self) -> ParseResult { - expect!(self, Keyword(Kw::Let)); - let constant = match self.token_handler.peek_kind() { - Keyword(Kw::Mut) => { - self.token_handler.next(); - false - } - _ => true, - }; - let name = self.identifier()?; - let type_anno = - if let Colon = self.token_handler.peek_kind() { Some(self.type_anno()?) } else { None }; - - expect!(self, Equals); - let expr = self.expression()?; - - Ok(Declaration::Binding { name, constant, type_anno, expr }) - } - - #[recursive_descent_method] - fn interface_declaration(&mut self) -> ParseResult { - expect!(self, Keyword(Interface)); - let name = self.identifier()?; - let signatures = self.signature_block()?; - Ok(Declaration::Interface { name, signatures }) - } - - #[recursive_descent_method] - fn signature_block(&mut self) -> ParseResult> { - Ok(delimited!(self, LCurlyBrace, func_signature, Newline | Semicolon, RCurlyBrace, nonstrict)) - } - - #[recursive_descent_method] - fn impl_declaration(&mut self) -> ParseResult { - expect!(self, Keyword(Impl)); - let first = self.type_singleton_name()?; - let second = if let Keyword(For) = self.token_handler.peek_kind() { - self.token_handler.next(); - Some(self.type_name()?) - } else { - None - }; - - let block = self.decl_block()?; - Ok(match (first, second) { - (interface_name, Some(type_name)) => - Declaration::Impl { type_name, interface_name: Some(interface_name), block }, - (type_singleton_name, None) => Declaration::Impl { - type_name: TypeIdentifier::Singleton(type_singleton_name), - interface_name: None, - block, - }, - }) - } - - #[recursive_descent_method] - fn decl_block(&mut self) -> ParseResult> { - Ok(delimited!(self, LCurlyBrace, func_declaration, Newline | Semicolon, RCurlyBrace, nonstrict)) - } - - #[recursive_descent_method] - fn expression(&mut self) -> ParseResult { - let mut expr_body = self.precedence_expr(BinOp::min_precedence())?; - let type_anno = match self.token_handler.peek_kind() { - Colon => Some(self.type_anno()?), - _ => None, - }; - if expr_body.type_anno.is_some() { - return ParseError::new_with_token("Bad parse state encountered", self.token_handler.peek()); - } - expr_body.type_anno = type_anno; - Ok(expr_body) - } - - #[recursive_descent_method] - fn type_anno(&mut self) -> ParseResult { - expect!(self, Colon); - self.type_name() - } - - #[recursive_descent_method] - fn type_name(&mut self) -> ParseResult { - use self::TypeIdentifier::*; - Ok(match self.token_handler.peek_kind() { - LParen => Tuple(delimited!(self, LParen, type_name, Comma, RParen)), - _ => Singleton(self.type_singleton_name()?), - }) - } - - #[recursive_descent_method] - fn type_singleton_name(&mut self) -> ParseResult { - Ok(TypeSingletonName { - name: self.identifier()?, - params: match self.token_handler.peek_kind() { - LAngleBracket => delimited!(self, LAngleBracket, type_name, Comma, RAngleBracket), - _ => vec![], - }, - }) - } - - // this implements Pratt parsing, see http://journal.stuffwithstuff.com/2011/03/19/pratt-parsers-expression-parsing-made-easy/ - #[allow(clippy::while_let_loop)] - fn precedence_expr(&mut self, precedence: i32) -> ParseResult { - let record = ParseRecord { - production_name: "precedence_expr".to_string(), - next_token: self.token_handler.peek().to_string_with_metadata(), - level: self.parse_level, - }; - self.parse_level += 1; - self.parse_record.push(record); - - let mut lhs = self.prefix_expr()?; - loop { - let new_precedence = match BinOp::get_precedence_from_token(&self.token_handler.peek_kind()) { - Some(p) => p, - None => break, - }; - - if precedence >= new_precedence { - break; - } - let next_tok = self.token_handler.next(); - let operation = match BinOp::from_sigil_token(&next_tok.kind) { - Some(sigil) => sigil, - //TODO I think I can fix this unreachable - None => unreachable!(), - }; - let rhs = self.precedence_expr(new_precedence)?; - lhs = Expression::new( - self.id_store.fresh(), - ExpressionKind::BinExp(operation, Box::new(lhs), Box::new(rhs)), - ); - } - self.parse_level -= 1; - Ok(lhs) - } - - #[recursive_descent_method] - fn prefix_expr(&mut self) -> ParseResult { - while let Semicolon | Newline = self.token_handler.peek_kind() { - self.token_handler.next(); - } - - match self.token_handler.peek_kind() { - Operator(ref op) if PrefixOp::is_prefix(&*op) => { - let sigil = match self.token_handler.next().kind { - Operator(op) => op, - _ => unreachable!(), - }; - let expr = self.primary()?; - let prefix_op = PrefixOp::from_sigil(sigil.as_str()); - Ok(Expression::new( - self.id_store.fresh(), - ExpressionKind::PrefixExp(prefix_op, Box::new(expr)), - )) - } - _ => self.extended_expr(), - } - } - - #[recursive_descent_method] - fn extended_expr(&mut self) -> ParseResult { - let mut expression = self.primary()?; - loop { - //TODO need a next non whitespace - let next = self.token_handler.peek_kind(); - match next { - Period => { - self.token_handler.next(); - let name = self.identifier()?; - expression = Expression::new( - self.id_store.fresh(), - ExpressionKind::Access { name, expr: Box::new(expression) }, - ); - } - LSquareBracket => { - let indexers = delimited!(self, LSquareBracket, expression, Comma, RSquareBracket); - if indexers.is_empty() { - return ParseError::new_with_token( - "Empty index expressions are not allowed", - self.token_handler.peek(), - ); - } - expression = Expression::new( - self.id_store.fresh(), - ExpressionKind::Index { indexee: Box::new(expression), indexers }, - ); - } - LParen => { - let arguments = delimited!(self, LParen, invocation_argument, Comma, RParen); - expression = Expression::new( - self.id_store.fresh(), - ExpressionKind::Call { f: Box::new(expression), arguments }, - ); - } - _ => break, - } - } - Ok(expression) - } - - #[recursive_descent_method] - fn invocation_argument(&mut self) -> ParseResult { - Ok(match self.token_handler.peek_kind() { - Underscore => { - self.token_handler.next(); - InvocationArgument::Ignored - } - Identifier(s) => match self.token_handler.peek_kind_n(1) { - Equals => { - self.token_handler.next(); - self.token_handler.next(); - let expr = self.expression()?; - InvocationArgument::Keyword { name: s, expr } - } - _ => { - let expr = self.expression()?; - InvocationArgument::Positional(expr) - } - }, - _ => InvocationArgument::Positional(self.expression()?), - }) - } - - #[recursive_descent_method] - fn index_expr(&mut self) -> ParseResult { - let primary = self.primary()?; - Ok(if let LSquareBracket = self.token_handler.peek_kind() { - let indexers = delimited!(self, LSquareBracket, expression, Comma, RSquareBracket); - if indexers.is_empty() { - return ParseError::new_with_token( - "Empty index expressions are not allowed", - self.token_handler.peek(), - ); - } - Expression::new( - self.id_store.fresh(), - ExpressionKind::Index { - indexee: Box::new(Expression::new(self.id_store.fresh(), primary.kind)), - indexers, - }, - ) - } else { - primary - }) - } - - #[recursive_descent_method] - fn primary(&mut self) -> ParseResult { - match self.token_handler.peek_kind() { - LCurlyBrace => self.curly_brace_expr(), - Backslash => self.lambda_expr(), - LParen => self.paren_expr(), - LSquareBracket => self.list_expr(), - Keyword(Kw::If) => self.if_expr(), - Keyword(Kw::For) => self.for_expr(), - Keyword(Kw::While) => self.while_expr(), - Identifier(_) => self.identifier_expr(), - _ => self.literal(), - } - } - - #[recursive_descent_method] - fn list_expr(&mut self) -> ParseResult { - let exprs = delimited!(self, LSquareBracket, expression, Comma, RSquareBracket); - Ok(Expression::new(self.id_store.fresh(), ExpressionKind::ListLiteral(exprs))) - } - - #[recursive_descent_method] - fn curly_brace_expr(&mut self) -> ParseResult { - ParseError::new_with_token("Not implemented", self.token_handler.peek()) - } - - #[recursive_descent_method] - fn lambda_expr(&mut self) -> ParseResult { - expect!(self, Backslash); - let params = self.lambda_param_list()?; - let type_anno = match self.token_handler.peek_kind() { - Colon => Some(self.type_anno()?), - _ => None, - }; - let body = self.nonempty_func_body()?.into(); - Ok(Expression::new(self.id_store.fresh(), ExpressionKind::Lambda { params, type_anno, body })) - //TODO need to handle types somehow - } - - #[recursive_descent_method] - fn lambda_param_list(&mut self) -> ParseResult> { - if let LParen = self.token_handler.peek_kind() { - self.formal_param_list() - } else { - let single_param = self.formal_param()?; - Ok(vec![single_param]) - } - } - - #[recursive_descent_method] - fn paren_expr(&mut self) -> ParseResult { - use self::ExpressionKind::*; - let old_struct_value = self.restrictions.no_struct_literal; - self.restrictions.no_struct_literal = false; - let output = { - let mut inner = delimited!(self, LParen, expression, Comma, RParen); - match inner.len() { - 0 => Ok(Expression::new(self.id_store.fresh(), TupleLiteral(vec![]))), - 1 => Ok(inner.pop().unwrap()), - _ => Ok(Expression::new(self.id_store.fresh(), TupleLiteral(inner))), - } - }; - self.restrictions.no_struct_literal = old_struct_value; - output - } - - #[recursive_descent_method] - fn identifier_expr(&mut self) -> ParseResult { - use self::ExpressionKind::*; - let components = self.qualified_identifier()?; - let qualified_identifier = QualifiedName { id: self.id_store.fresh(), components }; - Ok(match self.token_handler.peek_kind() { - LCurlyBrace if !self.restrictions.no_struct_literal => { - let fields = self.record_block()?; - Expression::new(self.id_store.fresh(), NamedStruct { name: qualified_identifier, fields }) - } - _ => Expression::new(self.id_store.fresh(), Value(qualified_identifier)), - }) - } - - #[recursive_descent_method] - fn qualified_identifier(&mut self) -> ParseResult>> { - let mut components = vec![self.identifier()?]; - - while let (Colon, Colon) = (self.token_handler.peek_kind(), self.token_handler.peek_kind_n(1)) { - self.token_handler.next(); - self.token_handler.next(); - components.push(self.identifier()?); - } - Ok(components) - } - - #[recursive_descent_method] - fn record_block(&mut self) -> ParseResult, Expression)>> { - Ok(delimited!(self, LCurlyBrace, record_entry, Comma, RCurlyBrace)) - } - - #[recursive_descent_method] - fn record_entry(&mut self) -> ParseResult<(Rc, Expression)> { - let field_name = self.identifier()?; - expect!(self, Colon); - let value = self.expression()?; - Ok((field_name, value)) - } - - #[recursive_descent_method] - fn if_expr(&mut self) -> ParseResult { - expect!(self, Keyword(Kw::If)); - let old_struct_value = self.restrictions.no_struct_literal; - self.restrictions.no_struct_literal = true; - let discriminator = if let LCurlyBrace = self.token_handler.peek_kind() { - None - } else { - Some(Box::new(self.expression()?)) - }; - let body = Box::new(self.if_expr_body()?); - self.restrictions.no_struct_literal = old_struct_value; - Ok(Expression::new(self.id_store.fresh(), ExpressionKind::IfExpression { discriminator, body })) - } - - #[recursive_descent_method] - fn if_expr_body(&mut self) -> ParseResult { - match self.token_handler.peek_kind() { - Keyword(Kw::Then) => self.simple_conditional(), - Keyword(Kw::Is) => self.simple_pattern_match(), - _ => self.cond_block(), - } - } - - #[recursive_descent_method] - fn simple_conditional(&mut self) -> ParseResult { - expect!(self, Keyword(Kw::Then)); - let then_case = self.expr_or_block()?; - let else_case = self.else_case()?; - Ok(IfExpressionBody::SimpleConditional { then_case, else_case }) - } - - #[recursive_descent_method] - fn simple_pattern_match(&mut self) -> ParseResult { - expect!(self, Keyword(Kw::Is)); - let pattern = self.pattern()?; - expect!(self, Keyword(Kw::Then)); - let then_case = self.expr_or_block()?; - let else_case = self.else_case()?; - Ok(IfExpressionBody::SimplePatternMatch { pattern, then_case, else_case }) - } - - #[recursive_descent_method] - fn else_case(&mut self) -> ParseResult> { - Ok(if let Keyword(Kw::Else) = self.token_handler.peek_kind() { - self.token_handler.next(); - Some(self.expr_or_block()?) - } else { - None - }) - } - - #[recursive_descent_method] - fn cond_block(&mut self) -> ParseResult { - expect!(self, LCurlyBrace); - let mut cond_arms = vec![]; - loop { - match self.token_handler.peek_kind() { - RCurlyBrace | EOF => break, - Semicolon | Newline => { - self.token_handler.next(); - continue; - } - _ => { - cond_arms.push(self.cond_arm()?); - match self.token_handler.peek_kind() { - Comma | Semicolon | Newline => { - self.token_handler.next(); - continue; - } - _ => break, - } - } - } - } - expect!(self, RCurlyBrace); - Ok(IfExpressionBody::CondList(cond_arms)) - } - - #[recursive_descent_method] - fn cond_arm(&mut self) -> ParseResult { - let (condition, guard) = if let Keyword(Kw::Else) = self.token_handler.peek_kind() { - self.token_handler.next(); - (Condition::Else, None) - } else { - let condition = self.condition()?; - let guard = self.guard()?; - expect!(self, Keyword(Kw::Then)); - (condition, guard) - }; - let body = self.expr_or_block()?; - Ok(ConditionArm { condition, guard, body }) - } - - #[recursive_descent_method] - fn condition(&mut self) -> ParseResult { - Ok(match self.token_handler.peek_kind() { - Keyword(Kw::Is) => { - self.token_handler.next(); - Condition::Pattern(self.pattern()?) - } - ref tok if BinOp::from_sigil_token(tok).is_some() => { - let op = BinOp::from_sigil_token(&self.token_handler.next().kind).unwrap(); - let expr = self.expression()?; - Condition::TruncatedOp(op, expr) - } - //_ => Condition::Expression(self.expression()?), - _ => panic!(), - }) - } - - #[recursive_descent_method] - fn guard(&mut self) -> ParseResult> { - Ok(match self.token_handler.peek_kind() { - Keyword(Kw::If) => { - self.token_handler.next(); - Some(self.expression()?) - } - _ => None, - }) - } - - #[recursive_descent_method] - fn pattern(&mut self) -> ParseResult { - if let LParen = self.token_handler.peek_kind() { - let tuple_pattern_variants = delimited!(self, LParen, pattern, Comma, RParen); - Ok(Pattern::TuplePattern(tuple_pattern_variants)) - } else { - self.simple_pattern() - } - } - - #[recursive_descent_method] - fn simple_pattern(&mut self) -> ParseResult { - Ok(match self.token_handler.peek_kind() { - Identifier(_) => { - let components = self.qualified_identifier()?; - let qualified_identifier = QualifiedName { id: self.id_store.fresh(), components }; - match self.token_handler.peek_kind() { - LCurlyBrace => { - let members = delimited!(self, LCurlyBrace, record_pattern_entry, Comma, RCurlyBrace); - Pattern::Record(qualified_identifier, members) - } - LParen => { - let members = delimited!(self, LParen, pattern, Comma, RParen); - Pattern::TupleStruct(qualified_identifier, members) - } - _ => Pattern::VarOrName(qualified_identifier), - } - } - _ => self.pattern_literal()?, - }) - } - - #[recursive_descent_method] - fn pattern_literal(&mut self) -> ParseResult { - let tok = self.token_handler.peek(); - Ok(match tok.get_kind() { - Keyword(Kw::True) => { - self.token_handler.next(); - Pattern::Literal(PatternLiteral::BoolPattern(true)) - } - Keyword(Kw::False) => { - self.token_handler.next(); - Pattern::Literal(PatternLiteral::BoolPattern(false)) - } - StrLiteral { s, .. } => { - self.token_handler.next(); - Pattern::Literal(PatternLiteral::StringPattern(s)) - } - DigitGroup(_) | HexLiteral(_) | BinNumberSigil | Period => self.signed_number_literal()?, - Operator(ref op) if **op == "-" => self.signed_number_literal()?, - Underscore => { - self.token_handler.next(); - Pattern::Ignored - } - other => return ParseError::new_with_token(format!("{:?} is not a valid Pattern", other), tok), - }) - } - - #[recursive_descent_method] - fn signed_number_literal(&mut self) -> ParseResult { - let neg = match self.token_handler.peek_kind() { - Operator(ref op) if **op == "-" => { - self.token_handler.next(); - true - } - _ => false, - }; - let Expression { kind, .. } = self.number_literal()?; - Ok(Pattern::Literal(PatternLiteral::NumPattern { neg, num: kind })) - } - - #[recursive_descent_method] - fn record_pattern_entry(&mut self) -> ParseResult<(Rc, Pattern)> { - let name = self.identifier()?; - Ok(match self.token_handler.peek_kind() { - Colon => { - expect!(self, Colon); - let pat = self.pattern()?; - (name, pat) - } - _ => { - let qualified_identifier = - QualifiedName { id: self.id_store.fresh(), components: vec![name.clone()] }; - (name, Pattern::VarOrName(qualified_identifier)) - } - }) - } - - #[recursive_descent_method] - fn block(&mut self) -> ParseResult { - let block = delimited!(self, LCurlyBrace, statement, Newline | Semicolon, RCurlyBrace, nonstrict); - Ok(block.into()) - } - - #[recursive_descent_method] - fn expr_or_block(&mut self) -> ParseResult { - let tok = self.token_handler.peek(); - match tok.get_kind() { - LCurlyBrace => self.block(), - _ => { - let expr = self.expression()?; - let s = Statement { - id: self.id_store.fresh(), - location: tok.location, - kind: StatementKind::Expression(expr), - }; - Ok(s.into()) - } - } - } - - #[recursive_descent_method] - fn while_expr(&mut self) -> ParseResult { - use self::ExpressionKind::*; - expect!(self, Keyword(Kw::While)); - let condition = { - self.restrictions.no_struct_literal = true; - let x = self.while_cond(); - self.restrictions.no_struct_literal = false; - x?.map(Box::new) - }; - let body = self.block()?; - Ok(Expression::new(self.id_store.fresh(), WhileExpression { condition, body })) - } - - #[recursive_descent_method] - fn while_cond(&mut self) -> ParseResult> { - Ok(match self.token_handler.peek_kind() { - LCurlyBrace => None, - _ => Some(self.expression()?), - }) - } - - #[recursive_descent_method] - fn for_expr(&mut self) -> ParseResult { - expect!(self, Keyword(Kw::For)); - let enumerators = if let LCurlyBrace = self.token_handler.peek_kind() { - delimited!(self, LCurlyBrace, enumerator, Comma | Newline, RCurlyBrace) - } else { - let single_enum = { - self.restrictions.no_struct_literal = true; - let s = self.enumerator(); - self.restrictions.no_struct_literal = false; - s? - }; - vec![single_enum] - }; - let body = Box::new(self.for_expr_body()?); - Ok(Expression::new(self.id_store.fresh(), ExpressionKind::ForExpression { enumerators, body })) - } - - #[recursive_descent_method] - fn enumerator(&mut self) -> ParseResult { - let id = self.identifier()?; - expect!(self, Operator(ref c) if **c == "<-"); - let generator = self.expression()?; - Ok(Enumerator { id, generator }) - } - - #[recursive_descent_method] - fn for_expr_body(&mut self) -> ParseResult { - use self::ForBody::*; - let tok = self.token_handler.peek(); - Ok(match tok.get_kind() { - LCurlyBrace => { - let statements = - delimited!(self, LCurlyBrace, statement, Newline | Semicolon, RCurlyBrace, nonstrict); - StatementBlock(statements.into()) - } - Keyword(Kw::Return) => { - self.token_handler.next(); - MonadicReturn(self.expression()?) - } - _ => return ParseError::new_with_token("for expressions must end in a block or 'return'", tok), - }) - } - - #[recursive_descent_method] - fn identifier(&mut self) -> ParseResult> { - let tok = self.token_handler.next(); - match tok.get_kind() { - Identifier(s) => Ok(s), - p => ParseError::new_with_token(format!("Expected an identifier, got {:?}", p), tok), - } - } - - #[recursive_descent_method] - fn literal(&mut self) -> ParseResult { - use self::ExpressionKind::*; - - let tok = self.token_handler.peek(); - match tok.get_kind() { - DigitGroup(_) | HexLiteral(_) | BinNumberSigil | Period => self.number_literal(), - Keyword(Kw::True) => { - self.token_handler.next(); - let id = self.id_store.fresh(); - Ok(Expression::new(id, BoolLiteral(true))) - } - Keyword(Kw::False) => { - self.token_handler.next(); - let id = self.id_store.fresh(); - Ok(Expression::new(id, BoolLiteral(false))) - } - StrLiteral { s, .. } => { - self.token_handler.next(); - let id = self.id_store.fresh(); - Ok(Expression::new(id, StringLiteral(s))) - } - e => ParseError::new_with_token(format!("Expected a literal expression, got {:?}", e), tok), - } - } - - #[recursive_descent_method] - fn number_literal(&mut self) -> ParseResult { - match self.token_handler.peek_kind() { - HexLiteral(_) | BinNumberSigil => self.int_literal(), - _ => self.float_literal(), - } - } - - #[recursive_descent_method] - fn int_literal(&mut self) -> ParseResult { - use self::ExpressionKind::*; - let tok = self.token_handler.next(); - match tok.get_kind() { - BinNumberSigil => { - let digits = self.digits()?; - let n = parse_binary(digits, tok)?; - Ok(Expression::new(self.id_store.fresh(), NatLiteral(n))) - } - HexLiteral(text) => { - let digits: String = text.chars().filter(|c| c.is_digit(16)).collect(); - let n = parse_hex(digits, tok)?; - Ok(Expression::new(self.id_store.fresh(), NatLiteral(n))) - } - _ => return ParseError::new_with_token("Expected '0x' or '0b'", tok), - } - } - - #[recursive_descent_method] - fn float_literal(&mut self) -> ParseResult { - use self::ExpressionKind::*; - let tok = self.token_handler.peek(); - let mut digits = self.digits()?; - if let Period = self.token_handler.peek_kind() { - self.token_handler.next(); - digits.push('.'); - digits.push_str(&self.digits()?); - match digits.parse::() { - Ok(f) => Ok(Expression::new(self.id_store.fresh(), FloatLiteral(f))), - Err(e) => ParseError::new_with_token(format!("Float failed to parse with error: {}", e), tok), - } - } else { - match digits.parse::() { - Ok(d) => Ok(Expression::new(self.id_store.fresh(), NatLiteral(d))), - Err(e) => - ParseError::new_with_token(format!("Integer failed to parse with error: {}", e), tok), - } - } - } - - #[recursive_descent_method] - fn digits(&mut self) -> ParseResult { - let mut ds = String::new(); - loop { - match self.token_handler.peek_kind() { - Underscore => { - self.token_handler.next(); - continue; - } - DigitGroup(ref s) => { - self.token_handler.next(); - ds.push_str(s) - } - _ => break, - } - } - Ok(ds) - } - - #[recursive_descent_method] - fn import_declaration(&mut self) -> ParseResult { - expect!(self, Keyword(Import)); - let mut path_components = vec![self.identifier()?]; - - while let (Colon, Colon) = (self.token_handler.peek_kind(), self.token_handler.peek_kind_n(1)) { - self.token_handler.next(); - self.token_handler.next(); - if let Identifier(_) = self.token_handler.peek_kind() { - path_components.push(self.identifier()?); - } else { - break; - } - } - - let imported_names = match self.token_handler.peek_kind() { - LCurlyBrace => { - let names = delimited!(self, LCurlyBrace, identifier, Comma, RCurlyBrace); - ImportedNames::List(names) - } - Operator(ref s) if **s == "*" => { - self.token_handler.next(); - ImportedNames::All - } - _ => ImportedNames::LastOfPath, - }; - - Ok(ImportSpecifier { id: self.id_store.fresh(), path_components, imported_names }) - } - - #[recursive_descent_method] - fn import_suffix(&mut self) -> ParseResult { - Ok(match self.token_handler.peek_kind() { - Operator(ref s) if **s == "*" => { - self.token_handler.next(); - ImportedNames::All - } - LCurlyBrace => { - let names = delimited!(self, LCurlyBrace, identifier, Comma, RCurlyBrace); - ImportedNames::List(names) - } - _ => return ParseError::new_with_token("Expected '{{' or '*'", self.token_handler.peek()), - }) - } - - #[recursive_descent_method] - fn module_declaration(&mut self) -> ParseResult { - expect!(self, Keyword(Kw::Module)); - let name = self.identifier()?; - let items = delimited!(self, LCurlyBrace, statement, Newline | Semicolon, RCurlyBrace, nonstrict); - Ok(Declaration::Module { name, items: items.into() }) - } -} - -fn parse_binary(digits: String, tok: Token) -> ParseResult { - let mut result: u64 = 0; - let mut multiplier = 1; - for d in digits.chars().rev() { - match d { - '1' => result += multiplier, - '0' => (), - _ => - return ParseError::new_with_token( - "Encountered a character not '1' or '0 while parsing a binary literal", - tok, - ), - } - multiplier = match multiplier.checked_mul(2) { - Some(m) => m, - None => return ParseError::new_with_token("This binary expression will overflow", tok), - } - } - Ok(result) -} - -fn parse_hex(digits: String, tok: Token) -> ParseResult { - let mut result: u64 = 0; - let mut multiplier: u64 = 1; - for d in digits.chars().rev() { - match d.to_digit(16) { - Some(n) => result += n as u64 * multiplier, - None => return ParseError::new_with_token("Encountered a non-hex digit in a hex literal", tok), - } - multiplier = match multiplier.checked_mul(16) { - Some(m) => m, - None => return ParseError::new_with_token("This hex expression will overflow", tok), - } - } - Ok(result) -} diff --git a/schala-lang/language/src/parsing/new.rs b/schala-lang/language/src/parsing/new.rs index 6a11995..1735cfd 100644 --- a/schala-lang/language/src/parsing/new.rs +++ b/schala-lang/language/src/parsing/new.rs @@ -6,7 +6,6 @@ use crate::{ ast::*, identifier::{Id, IdStore}, parsing::ParseError, - schala::SourceReference, }; fn rc_string(s: &str) -> Rc { @@ -30,10 +29,6 @@ impl Parser { ParseError { msg, location: err.location.offset.into(), - token: crate::tokenizing::Token { - kind: crate::tokenizing::TokenKind::Semicolon, - location: Default::default(), - }, } }) } diff --git a/schala-lang/language/src/parsing/test.rs b/schala-lang/language/src/parsing/test.rs index 8bfd0af..50add16 100644 --- a/schala-lang/language/src/parsing/test.rs +++ b/schala-lang/language/src/parsing/test.rs @@ -7,7 +7,7 @@ use std::{fmt::Write, rc::Rc}; use pretty_assertions::assert_eq; use super::new::{schala_parser, Parser}; -use crate::{ast::*, tokenizing::Location}; +use crate::{ast::*, parsing::Location}; fn rc(s: &str) -> Rc { Rc::new(s.to_owned()) diff --git a/schala-lang/language/src/schala.rs b/schala-lang/language/src/schala.rs index 2e8e167..6d55148 100644 --- a/schala-lang/language/src/schala.rs +++ b/schala-lang/language/src/schala.rs @@ -5,7 +5,7 @@ use schala_repl::{ use stopwatch::Stopwatch; use crate::{ - error::SchalaError, parsing, reduced_ir, symbol_table, tokenizing, tree_walk_eval, type_inference, + error::SchalaError, parsing, reduced_ir, symbol_table, tree_walk_eval, type_inference, }; /// All the state necessary to parse and execute a Schala program are stored in this struct. @@ -158,7 +158,6 @@ impl SourceReference { #[allow(dead_code)] #[derive(Clone, Copy, Debug)] pub(crate) enum Stage { - Tokenizing, Parsing, Symbols, ScopeResolution, @@ -168,7 +167,7 @@ pub(crate) enum Stage { } fn stage_names() -> Vec<&'static str> { - vec!["tokenizing", "parsing", "symbol-table", "typechecking", "ast-reduction", "ast-walking-evaluation"] + vec!["parsing", "symbol-table", "typechecking", "ast-reduction", "ast-walking-evaluation"] } #[derive(Default, Clone)] diff --git a/schala-lang/language/src/symbol_table/mod.rs b/schala-lang/language/src/symbol_table/mod.rs index 82cd38d..7ea1382 100644 --- a/schala-lang/language/src/symbol_table/mod.rs +++ b/schala-lang/language/src/symbol_table/mod.rs @@ -10,7 +10,7 @@ use crate::{ ast, ast::ItemId, builtin::Builtin, - tokenizing::Location, + parsing::Location, type_inference::{TypeContext, TypeId}, }; diff --git a/schala-lang/language/src/symbol_table/populator.rs b/schala-lang/language/src/symbol_table/populator.rs index 2b11747..38b9700 100644 --- a/schala-lang/language/src/symbol_table/populator.rs +++ b/schala-lang/language/src/symbol_table/populator.rs @@ -11,7 +11,7 @@ use crate::{ TypeSingletonName, Variant, VariantKind, AST, }, builtin::Builtin, - tokenizing::Location, + parsing::Location, type_inference::{self, PendingType, TypeBuilder, TypeContext, VariantBuilder}, }; diff --git a/schala-lang/language/src/symbol_table/test.rs b/schala-lang/language/src/symbol_table/test.rs index 29587ef..381eb5e 100644 --- a/schala-lang/language/src/symbol_table/test.rs +++ b/schala-lang/language/src/symbol_table/test.rs @@ -2,7 +2,7 @@ use assert_matches::assert_matches; use super::*; -use crate::{tokenizing::Location, util::quick_ast}; +use crate::util::quick_ast; fn add_symbols(src: &str) -> (SymbolTable, Result<(), Vec>) { let ast = quick_ast(src); @@ -79,7 +79,7 @@ fn no_type_definition_duplicates() { let err = &errs[0]; match err { - SymbolError::DuplicateName { location, prev_name } => { + SymbolError::DuplicateName { location: _, prev_name } => { assert_eq!(prev_name, &Fqsn::from_strs(&["Food"])); //TODO restore this Location test diff --git a/schala-lang/language/src/tokenizing.rs b/schala-lang/language/src/tokenizing.rs deleted file mode 100644 index 3b311dc..0000000 --- a/schala-lang/language/src/tokenizing.rs +++ /dev/null @@ -1,464 +0,0 @@ -#![allow(clippy::upper_case_acronyms)] - -use std::{ - convert::{TryFrom, TryInto}, - fmt, - iter::{Iterator, Peekable}, - rc::Rc, -}; - -use itertools::Itertools; - -/// A location in a particular source file. Note that the -/// sizes of the internal unsigned integer types limit -/// the size of a source file to 2^32 lines of -/// at most 2^16 characters, which should be plenty big. -#[derive(Debug, Clone, Copy, PartialEq, Default)] -pub struct Location { - pub(crate) offset: usize, -} - -impl From for Location { - fn from(offset: usize) -> Self { - Self { offset } - } -} - -impl fmt::Display for Location { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - write!(f, "{}", self.offset) - } -} - -#[derive(Debug, PartialEq, Clone)] -pub enum TokenKind { - Newline, - Semicolon, - - LParen, - RParen, - LSquareBracket, - RSquareBracket, - LAngleBracket, - RAngleBracket, - LCurlyBrace, - RCurlyBrace, - Pipe, - Backslash, - AtSign, - - Comma, - Period, - Colon, - Underscore, - Slash, - Equals, - - Operator(Rc), - DigitGroup(Rc), - HexLiteral(Rc), - BinNumberSigil, - StrLiteral { s: Rc, prefix: Option> }, - Identifier(Rc), - Keyword(Kw), - - EOF, - - Error(String), -} -use self::TokenKind::*; - -impl fmt::Display for TokenKind { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - match self { - &Operator(ref s) => write!(f, "Operator({})", **s), - &DigitGroup(ref s) => write!(f, "DigitGroup({})", s), - &HexLiteral(ref s) => write!(f, "HexLiteral({})", s), - &StrLiteral { ref s, .. } => write!(f, "StrLiteral({})", s), - &Identifier(ref s) => write!(f, "Identifier({})", s), - &Error(ref s) => write!(f, "Error({})", s), - other => write!(f, "{:?}", other), - } - } -} - -#[derive(Debug, Clone, Copy, PartialEq)] -pub enum Kw { - If, - Then, - Else, - Is, - Func, - For, - While, - Let, - In, - Mut, - Return, - Continue, - Break, - Alias, - Type, - SelfType, - SelfIdent, - Interface, - Impl, - True, - False, - Module, - Import, -} - -impl TryFrom<&str> for Kw { - type Error = (); - - fn try_from(value: &str) -> Result { - Ok(match value { - "if" => Kw::If, - "then" => Kw::Then, - "else" => Kw::Else, - "is" => Kw::Is, - "fn" => Kw::Func, - "for" => Kw::For, - "while" => Kw::While, - "let" => Kw::Let, - "in" => Kw::In, - "mut" => Kw::Mut, - "return" => Kw::Return, - "break" => Kw::Break, - "continue" => Kw::Continue, - "alias" => Kw::Alias, - "type" => Kw::Type, - "Self" => Kw::SelfType, - "self" => Kw::SelfIdent, - "interface" => Kw::Interface, - "impl" => Kw::Impl, - "true" => Kw::True, - "false" => Kw::False, - "module" => Kw::Module, - "import" => Kw::Import, - _ => return Err(()), - }) - } -} - -#[derive(Debug, Clone, PartialEq)] -pub struct Token { - pub kind: TokenKind, - pub(crate) location: Location, -} - -impl Token { - pub fn to_string_with_metadata(&self) -> String { - format!("{}({})", self.kind, self.location) - } - - pub fn get_kind(&self) -> TokenKind { - self.kind.clone() - } -} - -const OPERATOR_CHARS: [char; 17] = - ['!', '$', '%', '&', '*', '+', '-', '.', ':', '<', '>', '=', '?', '^', '|', '~', '`']; -fn is_operator(c: &char) -> bool { - OPERATOR_CHARS.iter().any(|x| x == c) -} - -type CharData = (usize, usize, char); - -pub fn tokenize(input: &str) -> Vec { - let mut tokens: Vec = Vec::new(); - - let mut input = Iterator::intersperse(input.lines().enumerate(), (0, "\n")) - .flat_map(|(line_idx, line)| line.chars().enumerate().map(move |(ch_idx, ch)| (line_idx, ch_idx, ch))) - .peekable(); - - while let Some((line_num, char_num, c)) = input.next() { - let cur_tok_kind = match c { - '/' => match input.peek().map(|t| t.2) { - Some('/') => { - for (_, _, c) in input.by_ref() { - if c == '\n' { - break; - } - } - continue; - } - Some('*') => { - input.next(); - let mut comment_level = 1; - while let Some((_, _, c)) = input.next() { - if c == '*' && input.peek().map(|t| t.2) == Some('/') { - input.next(); - comment_level -= 1; - } else if c == '/' && input.peek().map(|t| t.2) == Some('*') { - input.next(); - comment_level += 1; - } - if comment_level == 0 { - break; - } - } - if comment_level != 0 { - Error("Unclosed comment".to_string()) - } else { - continue; - } - } - _ => Slash, - }, - c if c.is_whitespace() && c != '\n' => continue, - '\n' => Newline, - ';' => Semicolon, - ':' => Colon, - ',' => Comma, - '(' => LParen, - ')' => RParen, - '{' => LCurlyBrace, - '}' => RCurlyBrace, - '[' => LSquareBracket, - ']' => RSquareBracket, - '"' => handle_quote(&mut input, None), - '\\' => Backslash, - '@' => AtSign, - c if c.is_digit(10) => handle_digit(c, &mut input), - c if c.is_alphabetic() || c == '_' => handle_alphabetic(c, &mut input), - c if is_operator(&c) => handle_operator(c, &mut input), - unknown => Error(format!("Unexpected character: {}", unknown)), - }; - let location = Location { offset: 0 }; - tokens.push(Token { kind: cur_tok_kind, location }); - } - tokens -} - -fn handle_digit(c: char, input: &mut Peekable>) -> TokenKind { - let next_ch = input.peek().map(|&(_, _, c)| c); - - if c == '0' && next_ch == Some('x') { - input.next(); - let rest: String = input - .peeking_take_while(|&(_, _, ref c)| c.is_digit(16) || *c == '_') - .map(|(_, _, c)| c) - .collect(); - HexLiteral(Rc::new(rest)) - } else if c == '0' && next_ch == Some('b') { - input.next(); - BinNumberSigil - } else { - let mut buf = c.to_string(); - buf.extend(input.peeking_take_while(|&(_, _, ref c)| c.is_digit(10)).map(|(_, _, c)| c)); - DigitGroup(Rc::new(buf)) - } -} - -fn handle_quote( - input: &mut Peekable>, - quote_prefix: Option<&str>, -) -> TokenKind { - let mut buf = String::new(); - loop { - match input.next().map(|(_, _, c)| c) { - Some('"') => break, - Some('\\') => { - let next = input.peek().map(|&(_, _, c)| c); - if next == Some('n') { - input.next(); - buf.push('\n') - } else if next == Some('"') { - input.next(); - buf.push('"'); - } else if next == Some('t') { - input.next(); - buf.push('\t'); - } - } - Some(c) => buf.push(c), - None => return TokenKind::Error("Unclosed string".to_string()), - } - } - TokenKind::StrLiteral { s: Rc::new(buf), prefix: quote_prefix.map(|s| Rc::new(s.to_string())) } -} - -fn handle_alphabetic(c: char, input: &mut Peekable>) -> TokenKind { - let mut buf = String::new(); - buf.push(c); - let next_is_alphabetic = input.peek().map(|&(_, _, c)| !c.is_alphabetic()).unwrap_or(true); - if c == '_' && next_is_alphabetic { - return TokenKind::Underscore; - } - - loop { - match input.peek().map(|&(_, _, c)| c) { - Some(c) if c == '"' => { - input.next(); - return handle_quote(input, Some(&buf)); - } - Some(c) if c.is_alphanumeric() || c == '_' => { - input.next(); - buf.push(c); - } - _ => break, - } - } - - match Kw::try_from(buf.as_str()) { - Ok(kw) => TokenKind::Keyword(kw), - Err(()) => TokenKind::Identifier(Rc::new(buf)), - } -} - -fn handle_operator(c: char, input: &mut Peekable>) -> TokenKind { - match c { - '<' | '>' | '|' | '.' | '=' => { - let next = &input.peek().map(|&(_, _, c)| c); - let next_is_op = next.map(|n| is_operator(&n)).unwrap_or(false); - if !next_is_op { - return match c { - '<' => LAngleBracket, - '>' => RAngleBracket, - '|' => Pipe, - '.' => Period, - '=' => Equals, - _ => unreachable!(), - }; - } - } - _ => (), - }; - - let mut buf = String::new(); - - if c == '`' { - loop { - match input.peek().map(|&(_, _, c)| c) { - Some(c) if c.is_alphabetic() || c == '_' => { - input.next(); - buf.push(c); - } - Some('`') => { - input.next(); - break; - } - _ => break, - } - } - } else { - buf.push(c); - loop { - match input.peek().map(|&(_, _, c)| c) { - Some(c) if is_operator(&c) => { - input.next(); - buf.push(c); - } - _ => break, - } - } - } - TokenKind::Operator(Rc::new(buf)) -} - -#[cfg(test)] -mod schala_tokenizer_tests { - use super::{Kw::*, *}; - - macro_rules! digit { - ($ident:expr) => { - DigitGroup(Rc::new($ident.to_string())) - }; - } - macro_rules! ident { - ($ident:expr) => { - Identifier(Rc::new($ident.to_string())) - }; - } - macro_rules! op { - ($ident:expr) => { - Operator(Rc::new($ident.to_string())) - }; - } - - fn token_kinds(input: &str) -> Vec { - tokenize(input).into_iter().map(move |tok| tok.kind).collect() - } - - #[test] - fn tokens() { - let output = token_kinds("let a: A = c ++ d"); - assert_eq!( - output, - vec![ - Keyword(Let), - ident!("a"), - Colon, - ident!("A"), - LAngleBracket, - ident!("B"), - RAngleBracket, - Equals, - ident!("c"), - op!("++"), - ident!("d") - ] - ); - } - - #[test] - fn underscores() { - let output = token_kinds("4_8"); - assert_eq!(output, vec![digit!("4"), Underscore, digit!("8")]); - - let output = token_kinds("aba_yo"); - assert_eq!(output, vec![ident!("aba_yo")]); - } - - #[test] - fn comments() { - let output = token_kinds("1 + /* hella /* bro */ */ 2"); - assert_eq!(output, vec![digit!("1"), op!("+"), digit!("2")]); - - let output = token_kinds("1 + /* hella /* bro */ 2"); - assert_eq!(output, vec![digit!("1"), op!("+"), Error("Unclosed comment".to_string())]); - - //TODO not sure if I want this behavior - let output = token_kinds("1 + /* hella */ bro */ 2"); - assert_eq!( - output, - vec![ - digit!("1"), - op!("+"), - Identifier(Rc::new("bro".to_string())), - Operator(Rc::new("*".to_string())), - Slash, - DigitGroup(Rc::new("2".to_string())) - ] - ); - } - - #[test] - fn backtick_operators() { - let output = token_kinds("1 `plus` 2"); - assert_eq!(output, vec![digit!("1"), op!("plus"), digit!("2")]); - } - - #[test] - fn string_literals() { - let output = token_kinds(r#""some string""#); - assert_eq!(output, vec![StrLiteral { s: Rc::new("some string".to_string()), prefix: None }]); - - let output = token_kinds(r#"b"some bytestring""#); - assert_eq!( - output, - vec![StrLiteral { - s: Rc::new("some bytestring".to_string()), - prefix: Some(Rc::new("b".to_string())) - }] - ); - - let output = token_kinds(r#""Do \n \" escapes work\t""#); - assert_eq!( - output, - vec![StrLiteral { s: Rc::new("Do \n \" escapes work\t".to_string()), prefix: None }] - ); - } -}