Delete a bunch of now-obsolete parsing/tokenizing code

2021-11-14 03:18:05 -08:00
parent 05e1555a9b
commit 94ee3e1897
12 changed files with 21 additions and 1782 deletions
--- a/schala-lang/language/src/ast/mod.rs
+++ b/schala-lang/language/src/ast/mod.rs
@@ -17,7 +17,7 @@ pub use visitor::*;
 use crate::{
    derivative::Derivative,
    identifier::{define_id_kind, Id},
-    tokenizing::Location,
+    parsing::Location,
 };

 define_id_kind!(ASTItem);
--- a/schala-lang/language/src/ast/operators.rs
+++ b/schala-lang/language/src/ast/operators.rs
@@ -1,7 +1,5 @@
 use std::rc::Rc;

-use crate::tokenizing::TokenKind;
-
 #[derive(Debug, PartialEq, Clone)]
 pub struct PrefixOp {
    sigil: Rc<String>,
@@ -15,10 +13,6 @@ impl PrefixOp {
    pub fn sigil(&self) -> &str {
        &self.sigil
    }
-
-    pub fn is_prefix(op: &str) -> bool {
-        matches!(op, "+" | "-" | "!")
-    }
 }

 #[derive(Debug, PartialEq, Clone)]
@@ -35,38 +29,14 @@ impl BinOp {
        &self.sigil
    }

-    pub fn from_sigil_token(tok: &TokenKind) -> Option<BinOp> {
-        let s = token_kind_to_sigil(tok)?;
-        Some(BinOp::from_sigil(s))
-    }
-
    pub fn min_precedence() -> i32 {
        i32::min_value()
    }
-    pub fn get_precedence_from_token(op_tok: &TokenKind) -> Option<i32> {
-        let s = token_kind_to_sigil(op_tok)?;
-        Some(binop_precedences(s))
-    }
-
    pub fn get_precedence(&self) -> i32 {
        binop_precedences(self.sigil.as_ref())
    }
 }

-fn token_kind_to_sigil(tok: &TokenKind) -> Option<&str> {
-    use self::TokenKind::*;
-    Some(match tok {
-        Operator(op) => op.as_str(),
-        Period => ".",
-        Pipe => "|",
-        Slash => "/",
-        LAngleBracket => "<",
-        RAngleBracket => ">",
-        Equals => "=",
-        _ => return None,
-    })
-}
-
 fn binop_precedences(s: &str) -> i32 {
    let default = 10_000_000;
    match s {
--- a/schala-lang/language/src/error.rs
+++ b/schala-lang/language/src/error.rs
@@ -1,8 +1,7 @@
 use crate::{
-    parsing::ParseError,
+    parsing::{Location, ParseError},
    schala::{SourceReference, Stage},
    symbol_table::SymbolError,
-    tokenizing::{Location, Token, TokenKind},
    type_inference::TypeError,
 };

@@ -52,26 +51,6 @@ impl SchalaError {
            errors: vec![],
        }
    }
-
-    pub(crate) fn from_tokens(tokens: &[Token]) -> Option<SchalaError> {
-        let token_errors: Vec<Error> = tokens
-            .iter()
-            .filter_map(|tok| match tok.kind {
-                TokenKind::Error(ref err) => Some(Error {
-                    location: Some(tok.location),
-                    text: Some(err.clone()),
-                    stage: Stage::Tokenizing,
-                }),
-                _ => None,
-            })
-            .collect();
-
-        if token_errors.is_empty() {
-            None
-        } else {
-            Some(SchalaError { errors: token_errors, formatted_parse_error: None })
-        }
-    }
 }

 #[allow(dead_code)]
--- a/schala-lang/language/src/lib.rs
+++ b/schala-lang/language/src/lib.rs
@@ -7,7 +7,6 @@
 //! `ProgrammingLanguageInterface` and the chain of compiler passes for it.

 extern crate schala_repl;
-#[macro_use]
 extern crate schala_lang_codegen;
 extern crate derivative;

@@ -19,7 +18,6 @@ mod type_inference;

 mod ast;
 mod parsing;
-mod tokenizing;
 #[macro_use]
 mod symbol_table;
 mod builtin;
--- a/schala-lang/language/src/parsing/mod.rs
+++ b/schala-lang/language/src/parsing/mod.rs
--- a/schala-lang/language/src/parsing/new.rs
+++ b/schala-lang/language/src/parsing/new.rs
@@ -6,7 +6,6 @@ use crate::{
    ast::*,
    identifier::{Id, IdStore},
    parsing::ParseError,
-    schala::SourceReference,
 };

 fn rc_string(s: &str) -> Rc<String> {
@@ -30,10 +29,6 @@ impl Parser {
            ParseError {
                msg,
                location: err.location.offset.into(),
-                token: crate::tokenizing::Token {
-                    kind: crate::tokenizing::TokenKind::Semicolon,
-                    location: Default::default(),
-                },
            }
        })
    }
--- a/schala-lang/language/src/parsing/test.rs
+++ b/schala-lang/language/src/parsing/test.rs
@@ -7,7 +7,7 @@ use std::{fmt::Write, rc::Rc};
 use pretty_assertions::assert_eq;

 use super::new::{schala_parser, Parser};
-use crate::{ast::*, tokenizing::Location};
+use crate::{ast::*, parsing::Location};

 fn rc(s: &str) -> Rc<String> {
    Rc::new(s.to_owned())
--- a/schala-lang/language/src/schala.rs
+++ b/schala-lang/language/src/schala.rs
@@ -5,7 +5,7 @@ use schala_repl::{
 use stopwatch::Stopwatch;

 use crate::{
-    error::SchalaError, parsing, reduced_ir, symbol_table, tokenizing, tree_walk_eval, type_inference,
+    error::SchalaError, parsing, reduced_ir, symbol_table, tree_walk_eval, type_inference,
 };

 /// All the state necessary to parse and execute a Schala program are stored in this struct.
@@ -158,7 +158,6 @@ impl SourceReference {
 #[allow(dead_code)]
 #[derive(Clone, Copy, Debug)]
 pub(crate) enum Stage {
-    Tokenizing,
    Parsing,
    Symbols,
    ScopeResolution,
@@ -168,7 +167,7 @@ pub(crate) enum Stage {
 }

 fn stage_names() -> Vec<&'static str> {
-    vec!["tokenizing", "parsing", "symbol-table", "typechecking", "ast-reduction", "ast-walking-evaluation"]
+    vec!["parsing", "symbol-table", "typechecking", "ast-reduction", "ast-walking-evaluation"]
 }

 #[derive(Default, Clone)]
--- a/schala-lang/language/src/symbol_table/mod.rs
+++ b/schala-lang/language/src/symbol_table/mod.rs
@@ -10,7 +10,7 @@ use crate::{
    ast,
    ast::ItemId,
    builtin::Builtin,
-    tokenizing::Location,
+    parsing::Location,
    type_inference::{TypeContext, TypeId},
 };

--- a/schala-lang/language/src/symbol_table/populator.rs
+++ b/schala-lang/language/src/symbol_table/populator.rs
@@ -11,7 +11,7 @@ use crate::{
        TypeSingletonName, Variant, VariantKind, AST,
    },
    builtin::Builtin,
-    tokenizing::Location,
+    parsing::Location,
    type_inference::{self, PendingType, TypeBuilder, TypeContext, VariantBuilder},
 };

--- a/schala-lang/language/src/symbol_table/test.rs
+++ b/schala-lang/language/src/symbol_table/test.rs
@@ -2,7 +2,7 @@
 use assert_matches::assert_matches;

 use super::*;
-use crate::{tokenizing::Location, util::quick_ast};
+use crate::util::quick_ast;

 fn add_symbols(src: &str) -> (SymbolTable, Result<(), Vec<SymbolError>>) {
    let ast = quick_ast(src);
@@ -79,7 +79,7 @@ fn no_type_definition_duplicates() {
    let err = &errs[0];

    match err {
-        SymbolError::DuplicateName { location, prev_name } => {
+        SymbolError::DuplicateName { location: _, prev_name } => {
            assert_eq!(prev_name, &Fqsn::from_strs(&["Food"]));

            //TODO restore this Location test
--- a/schala-lang/language/src/tokenizing.rs
+++ b/schala-lang/language/src/tokenizing.rs
@@ -1,464 +0,0 @@
-#![allow(clippy::upper_case_acronyms)]
-
-use std::{
-    convert::{TryFrom, TryInto},
-    fmt,
-    iter::{Iterator, Peekable},
-    rc::Rc,
-};
-
-use itertools::Itertools;
-
-/// A location in a particular source file. Note that the
-/// sizes of the internal unsigned integer types limit
-/// the size of a source file to 2^32 lines of
-/// at most 2^16 characters, which should be plenty big.
-#[derive(Debug, Clone, Copy, PartialEq, Default)]
-pub struct Location {
-    pub(crate) offset: usize,
-}
-
-impl From<usize> for Location {
-    fn from(offset: usize) -> Self {
-        Self { offset }
-    }
-}
-
-impl fmt::Display for Location {
-    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
-        write!(f, "{}", self.offset)
-    }
-}
-
-#[derive(Debug, PartialEq, Clone)]
-pub enum TokenKind {
-    Newline,
-    Semicolon,
-
-    LParen,
-    RParen,
-    LSquareBracket,
-    RSquareBracket,
-    LAngleBracket,
-    RAngleBracket,
-    LCurlyBrace,
-    RCurlyBrace,
-    Pipe,
-    Backslash,
-    AtSign,
-
-    Comma,
-    Period,
-    Colon,
-    Underscore,
-    Slash,
-    Equals,
-
-    Operator(Rc<String>),
-    DigitGroup(Rc<String>),
-    HexLiteral(Rc<String>),
-    BinNumberSigil,
-    StrLiteral { s: Rc<String>, prefix: Option<Rc<String>> },
-    Identifier(Rc<String>),
-    Keyword(Kw),
-
-    EOF,
-
-    Error(String),
-}
-use self::TokenKind::*;
-
-impl fmt::Display for TokenKind {
-    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
-        match self {
-            &Operator(ref s) => write!(f, "Operator({})", **s),
-            &DigitGroup(ref s) => write!(f, "DigitGroup({})", s),
-            &HexLiteral(ref s) => write!(f, "HexLiteral({})", s),
-            &StrLiteral { ref s, .. } => write!(f, "StrLiteral({})", s),
-            &Identifier(ref s) => write!(f, "Identifier({})", s),
-            &Error(ref s) => write!(f, "Error({})", s),
-            other => write!(f, "{:?}", other),
-        }
-    }
-}
-
-#[derive(Debug, Clone, Copy, PartialEq)]
-pub enum Kw {
-    If,
-    Then,
-    Else,
-    Is,
-    Func,
-    For,
-    While,
-    Let,
-    In,
-    Mut,
-    Return,
-    Continue,
-    Break,
-    Alias,
-    Type,
-    SelfType,
-    SelfIdent,
-    Interface,
-    Impl,
-    True,
-    False,
-    Module,
-    Import,
-}
-
-impl TryFrom<&str> for Kw {
-    type Error = ();
-
-    fn try_from(value: &str) -> Result<Self, Self::Error> {
-        Ok(match value {
-            "if" => Kw::If,
-            "then" => Kw::Then,
-            "else" => Kw::Else,
-            "is" => Kw::Is,
-            "fn" => Kw::Func,
-            "for" => Kw::For,
-            "while" => Kw::While,
-            "let" => Kw::Let,
-            "in" => Kw::In,
-            "mut" => Kw::Mut,
-            "return" => Kw::Return,
-            "break" => Kw::Break,
-            "continue" => Kw::Continue,
-            "alias" => Kw::Alias,
-            "type" => Kw::Type,
-            "Self" => Kw::SelfType,
-            "self" => Kw::SelfIdent,
-            "interface" => Kw::Interface,
-            "impl" => Kw::Impl,
-            "true" => Kw::True,
-            "false" => Kw::False,
-            "module" => Kw::Module,
-            "import" => Kw::Import,
-            _ => return Err(()),
-        })
-    }
-}
-
-#[derive(Debug, Clone, PartialEq)]
-pub struct Token {
-    pub kind: TokenKind,
-    pub(crate) location: Location,
-}
-
-impl Token {
-    pub fn to_string_with_metadata(&self) -> String {
-        format!("{}({})", self.kind, self.location)
-    }
-
-    pub fn get_kind(&self) -> TokenKind {
-        self.kind.clone()
-    }
-}
-
-const OPERATOR_CHARS: [char; 17] =
-    ['!', '$', '%', '&', '*', '+', '-', '.', ':', '<', '>', '=', '?', '^', '|', '~', '`'];
-fn is_operator(c: &char) -> bool {
-    OPERATOR_CHARS.iter().any(|x| x == c)
-}
-
-type CharData = (usize, usize, char);
-
-pub fn tokenize(input: &str) -> Vec<Token> {
-    let mut tokens: Vec<Token> = Vec::new();
-
-    let mut input = Iterator::intersperse(input.lines().enumerate(), (0, "\n"))
-        .flat_map(|(line_idx, line)| line.chars().enumerate().map(move |(ch_idx, ch)| (line_idx, ch_idx, ch)))
-        .peekable();
-
-    while let Some((line_num, char_num, c)) = input.next() {
-        let cur_tok_kind = match c {
-            '/' => match input.peek().map(|t| t.2) {
-                Some('/') => {
-                    for (_, _, c) in input.by_ref() {
-                        if c == '\n' {
-                            break;
-                        }
-                    }
-                    continue;
-                }
-                Some('*') => {
-                    input.next();
-                    let mut comment_level = 1;
-                    while let Some((_, _, c)) = input.next() {
-                        if c == '*' && input.peek().map(|t| t.2) == Some('/') {
-                            input.next();
-                            comment_level -= 1;
-                        } else if c == '/' && input.peek().map(|t| t.2) == Some('*') {
-                            input.next();
-                            comment_level += 1;
-                        }
-                        if comment_level == 0 {
-                            break;
-                        }
-                    }
-                    if comment_level != 0 {
-                        Error("Unclosed comment".to_string())
-                    } else {
-                        continue;
-                    }
-                }
-                _ => Slash,
-            },
-            c if c.is_whitespace() && c != '\n' => continue,
-            '\n' => Newline,
-            ';' => Semicolon,
-            ':' => Colon,
-            ',' => Comma,
-            '(' => LParen,
-            ')' => RParen,
-            '{' => LCurlyBrace,
-            '}' => RCurlyBrace,
-            '[' => LSquareBracket,
-            ']' => RSquareBracket,
-            '"' => handle_quote(&mut input, None),
-            '\\' => Backslash,
-            '@' => AtSign,
-            c if c.is_digit(10) => handle_digit(c, &mut input),
-            c if c.is_alphabetic() || c == '_' => handle_alphabetic(c, &mut input),
-            c if is_operator(&c) => handle_operator(c, &mut input),
-            unknown => Error(format!("Unexpected character: {}", unknown)),
-        };
-        let location = Location { offset: 0 };
-        tokens.push(Token { kind: cur_tok_kind, location });
-    }
-    tokens
-}
-
-fn handle_digit(c: char, input: &mut Peekable<impl Iterator<Item = CharData>>) -> TokenKind {
-    let next_ch = input.peek().map(|&(_, _, c)| c);
-
-    if c == '0' && next_ch == Some('x') {
-        input.next();
-        let rest: String = input
-            .peeking_take_while(|&(_, _, ref c)| c.is_digit(16) || *c == '_')
-            .map(|(_, _, c)| c)
-            .collect();
-        HexLiteral(Rc::new(rest))
-    } else if c == '0' && next_ch == Some('b') {
-        input.next();
-        BinNumberSigil
-    } else {
-        let mut buf = c.to_string();
-        buf.extend(input.peeking_take_while(|&(_, _, ref c)| c.is_digit(10)).map(|(_, _, c)| c));
-        DigitGroup(Rc::new(buf))
-    }
-}
-
-fn handle_quote(
-    input: &mut Peekable<impl Iterator<Item = CharData>>,
-    quote_prefix: Option<&str>,
-) -> TokenKind {
-    let mut buf = String::new();
-    loop {
-        match input.next().map(|(_, _, c)| c) {
-            Some('"') => break,
-            Some('\\') => {
-                let next = input.peek().map(|&(_, _, c)| c);
-                if next == Some('n') {
-                    input.next();
-                    buf.push('\n')
-                } else if next == Some('"') {
-                    input.next();
-                    buf.push('"');
-                } else if next == Some('t') {
-                    input.next();
-                    buf.push('\t');
-                }
-            }
-            Some(c) => buf.push(c),
-            None => return TokenKind::Error("Unclosed string".to_string()),
-        }
-    }
-    TokenKind::StrLiteral { s: Rc::new(buf), prefix: quote_prefix.map(|s| Rc::new(s.to_string())) }
-}
-
-fn handle_alphabetic(c: char, input: &mut Peekable<impl Iterator<Item = CharData>>) -> TokenKind {
-    let mut buf = String::new();
-    buf.push(c);
-    let next_is_alphabetic = input.peek().map(|&(_, _, c)| !c.is_alphabetic()).unwrap_or(true);
-    if c == '_' && next_is_alphabetic {
-        return TokenKind::Underscore;
-    }
-
-    loop {
-        match input.peek().map(|&(_, _, c)| c) {
-            Some(c) if c == '"' => {
-                input.next();
-                return handle_quote(input, Some(&buf));
-            }
-            Some(c) if c.is_alphanumeric() || c == '_' => {
-                input.next();
-                buf.push(c);
-            }
-            _ => break,
-        }
-    }
-
-    match Kw::try_from(buf.as_str()) {
-        Ok(kw) => TokenKind::Keyword(kw),
-        Err(()) => TokenKind::Identifier(Rc::new(buf)),
-    }
-}
-
-fn handle_operator(c: char, input: &mut Peekable<impl Iterator<Item = CharData>>) -> TokenKind {
-    match c {
-        '<' | '>' | '|' | '.' | '=' => {
-            let next = &input.peek().map(|&(_, _, c)| c);
-            let next_is_op = next.map(|n| is_operator(&n)).unwrap_or(false);
-            if !next_is_op {
-                return match c {
-                    '<' => LAngleBracket,
-                    '>' => RAngleBracket,
-                    '|' => Pipe,
-                    '.' => Period,
-                    '=' => Equals,
-                    _ => unreachable!(),
-                };
-            }
-        }
-        _ => (),
-    };
-
-    let mut buf = String::new();
-
-    if c == '`' {
-        loop {
-            match input.peek().map(|&(_, _, c)| c) {
-                Some(c) if c.is_alphabetic() || c == '_' => {
-                    input.next();
-                    buf.push(c);
-                }
-                Some('`') => {
-                    input.next();
-                    break;
-                }
-                _ => break,
-            }
-        }
-    } else {
-        buf.push(c);
-        loop {
-            match input.peek().map(|&(_, _, c)| c) {
-                Some(c) if is_operator(&c) => {
-                    input.next();
-                    buf.push(c);
-                }
-                _ => break,
-            }
-        }
-    }
-    TokenKind::Operator(Rc::new(buf))
-}
-
-#[cfg(test)]
-mod schala_tokenizer_tests {
-    use super::{Kw::*, *};
-
-    macro_rules! digit {
-        ($ident:expr) => {
-            DigitGroup(Rc::new($ident.to_string()))
-        };
-    }
-    macro_rules! ident {
-        ($ident:expr) => {
-            Identifier(Rc::new($ident.to_string()))
-        };
-    }
-    macro_rules! op {
-        ($ident:expr) => {
-            Operator(Rc::new($ident.to_string()))
-        };
-    }
-
-    fn token_kinds(input: &str) -> Vec<TokenKind> {
-        tokenize(input).into_iter().map(move |tok| tok.kind).collect()
-    }
-
-    #[test]
-    fn tokens() {
-        let output = token_kinds("let a: A<B> = c ++ d");
-        assert_eq!(
-            output,
-            vec![
-                Keyword(Let),
-                ident!("a"),
-                Colon,
-                ident!("A"),
-                LAngleBracket,
-                ident!("B"),
-                RAngleBracket,
-                Equals,
-                ident!("c"),
-                op!("++"),
-                ident!("d")
-            ]
-        );
-    }
-
-    #[test]
-    fn underscores() {
-        let output = token_kinds("4_8");
-        assert_eq!(output, vec![digit!("4"), Underscore, digit!("8")]);
-
-        let output = token_kinds("aba_yo");
-        assert_eq!(output, vec![ident!("aba_yo")]);
-    }
-
-    #[test]
-    fn comments() {
-        let output = token_kinds("1  + /* hella /* bro */ */ 2");
-        assert_eq!(output, vec![digit!("1"), op!("+"), digit!("2")]);
-
-        let output = token_kinds("1  + /* hella /* bro */ 2");
-        assert_eq!(output, vec![digit!("1"), op!("+"), Error("Unclosed comment".to_string())]);
-
-        //TODO not sure if I want this behavior
-        let output = token_kinds("1  + /* hella */ bro */ 2");
-        assert_eq!(
-            output,
-            vec![
-                digit!("1"),
-                op!("+"),
-                Identifier(Rc::new("bro".to_string())),
-                Operator(Rc::new("*".to_string())),
-                Slash,
-                DigitGroup(Rc::new("2".to_string()))
-            ]
-        );
-    }
-
-    #[test]
-    fn backtick_operators() {
-        let output = token_kinds("1 `plus` 2");
-        assert_eq!(output, vec![digit!("1"), op!("plus"), digit!("2")]);
-    }
-
-    #[test]
-    fn string_literals() {
-        let output = token_kinds(r#""some string""#);
-        assert_eq!(output, vec![StrLiteral { s: Rc::new("some string".to_string()), prefix: None }]);
-
-        let output = token_kinds(r#"b"some bytestring""#);
-        assert_eq!(
-            output,
-            vec![StrLiteral {
-                s: Rc::new("some bytestring".to_string()),
-                prefix: Some(Rc::new("b".to_string()))
-            }]
-        );
-
-        let output = token_kinds(r#""Do \n \" escapes work\t""#);
-        assert_eq!(
-            output,
-            vec![StrLiteral { s: Rc::new("Do \n \" escapes work\t".to_string()), prefix: None }]
-        );
-    }
-}