From d8f6c41f048267467cbf02281f2bacf85b4c2e90 Mon Sep 17 00:00:00 2001 From: Greg Shuflin Date: Thu, 21 Oct 2021 15:23:48 -0700 Subject: [PATCH] Start re-writing reduced ast and evaluator --- schala-lang/language/src/ast/mod.rs | 12 +- schala-lang/language/src/lib.rs | 2 + schala-lang/language/src/reduced_ir/mod.rs | 274 ++++++++++++++++++ schala-lang/language/src/reduced_ir/test.rs | 42 +++ schala-lang/language/src/schala.rs | 35 ++- schala-lang/language/src/symbol_table/mod.rs | 47 ++- .../language/src/symbol_table/resolver.rs | 8 +- .../language/src/tree_walk_eval/mod.rs | 144 +++++++++ 8 files changed, 550 insertions(+), 14 deletions(-) create mode 100644 schala-lang/language/src/reduced_ir/mod.rs create mode 100644 schala-lang/language/src/reduced_ir/test.rs create mode 100644 schala-lang/language/src/tree_walk_eval/mod.rs diff --git a/schala-lang/language/src/ast/mod.rs b/schala-lang/language/src/ast/mod.rs index 55dda76..c763b11 100644 --- a/schala-lang/language/src/ast/mod.rs +++ b/schala-lang/language/src/ast/mod.rs @@ -1,13 +1,15 @@ #![allow(clippy::upper_case_acronyms)] #![allow(clippy::enum_variant_names)] +use std::rc::Rc; +use std::fmt; + mod visitor; mod operators; pub use operators::{PrefixOp, BinOp}; pub use visitor::{walk_ast, walk_block, ASTVisitor}; -use std::rc::Rc; use crate::derivative::Derivative; use crate::tokenizing::Location; @@ -15,7 +17,7 @@ use crate::tokenizing::Location; /// the u32 index limits the size of an AST to 2^32 nodes. #[derive(Debug, PartialEq, Eq, Hash, Clone, Default)] pub struct ItemId { - idx: u32, + pub idx: u32, } impl ItemId { @@ -24,6 +26,12 @@ impl ItemId { } } +impl fmt::Display for ItemId { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "ItemId:{}", self.idx) + } +} + pub struct ItemIdStore { last_idx: u32 } diff --git a/schala-lang/language/src/lib.rs b/schala-lang/language/src/lib.rs index f5d6624..bfce505 100644 --- a/schala-lang/language/src/lib.rs +++ b/schala-lang/language/src/lib.rs @@ -25,6 +25,8 @@ mod builtin; mod error; mod eval; mod reduced_ast; +mod reduced_ir; +mod tree_walk_eval; mod schala; diff --git a/schala-lang/language/src/reduced_ir/mod.rs b/schala-lang/language/src/reduced_ir/mod.rs new file mode 100644 index 0000000..a8ea03b --- /dev/null +++ b/schala-lang/language/src/reduced_ir/mod.rs @@ -0,0 +1,274 @@ +use crate::ast; +use crate::symbol_table::{DefId, Symbol, SymbolSpec, SymbolTable}; +use crate::builtin::Builtin; + +use std::str::FromStr; +use std::collections::HashMap; +use std::rc::Rc; + +mod test; + +pub fn reduce(ast: &ast::AST, symbol_table: &SymbolTable) -> ReducedIR { + let reducer = Reducer::new(symbol_table); + reducer.reduce(ast) +} + +struct Reducer<'a> { + symbol_table: &'a SymbolTable, + functions: HashMap, +} + +impl<'a> Reducer<'a> { + fn new(symbol_table: &'a SymbolTable) -> Self { + Self { + symbol_table, + functions: HashMap::new(), + } + } + + fn reduce(mut self, ast: &ast::AST) -> ReducedIR { + // First reduce all functions + // TODO once this works, maybe rewrite it using the Visitor + for statement in ast.statements.iter() { + self.top_level_statement(&statement); + } + + // Then compute the entrypoint statements (which may reference previously-computed + // functions by ID) + let mut entrypoint = vec![]; + for statement in ast.statements.iter() { + let ast::Statement { id: item_id, kind, .. } = statement; + match &kind { + ast::StatementKind::Expression(expr) => { + entrypoint.push(Statement::Expression(self.expression(&expr))); + }, + ast::StatementKind::Declaration(ast::Declaration::Binding { name, constant, expr, ..}) => { + let symbol = self.symbol_table.lookup_symbol(item_id).unwrap(); + entrypoint.push(Statement::Binding { id: symbol.def_id.clone(), constant: *constant, expr: self.expression(&expr) }); + }, + _ => () + } + } + + ReducedIR { + functions: self.functions, + entrypoint, + } + } + + fn top_level_statement(&mut self, statement: &ast::Statement) { + let ast::Statement { id: item_id, kind, .. } = statement; + match kind { + ast::StatementKind::Expression(_expr) => { + //TODO expressions can in principle contain definitions, but I won't worry + //about it now + () + }, + ast::StatementKind::Declaration(decl) => match decl { + ast::Declaration::FuncDecl(_, statements) => { + self.insert_function_definition(item_id, statements); + }, + _ => () + }, + ast::StatementKind::Import(..) => (), + ast::StatementKind::Module(modspec) => { + //TODO handle modules + () + } + } + } + + fn function_internal_statement(&mut self, statement: &ast::Statement) -> Option { + let ast::Statement { id: item_id, kind, .. } = statement; + match kind { + ast::StatementKind::Expression(expr) => { + Some(Statement::Expression(self.expression(expr))) + }, + ast::StatementKind::Declaration(decl) => match decl { + ast::Declaration::FuncDecl(_, statements) => { + self.insert_function_definition(item_id, statements); + None + }, + _ => None + }, + _ => None + } + } + + fn insert_function_definition(&mut self, item_id: &ast::ItemId, statements: &ast::Block) { + let symbol = self.symbol_table.lookup_symbol(item_id).unwrap(); + let def_id = symbol.def_id.clone(); + let callable = Callable::UserDefined { + body: self.function(statements) + }; + self.functions.insert(def_id, callable); + } + + fn expression(&mut self, expr: &ast::Expression) -> Expression { + use crate::ast::ExpressionKind::*; + use Expression::{Unimplemented}; + + match &expr.kind { + NatLiteral(n) => Expression::Literal(Literal::Nat(*n)), + FloatLiteral(f) => Expression::Literal(Literal::Float(*f)), + StringLiteral(s) => Expression::Literal(Literal::StringLit(s.clone())), + BoolLiteral(b) => Expression::Literal(Literal::Bool(*b)), + BinExp(binop, lhs, rhs) => self.binop(binop, lhs, rhs), + PrefixExp(op, arg) => self.prefix(op, arg), + Value(qualified_name) => self.value(qualified_name), + Call { f, arguments } => Unimplemented, // self.reduce_call_expression(f, arguments), + TupleLiteral(exprs) => Expression::Tuple(exprs.iter().map(|e| self.expression(e)).collect()), + IfExpression { + discriminator, + body, + } => Unimplemented, //self.reduce_if_expression(deref_optional_box(discriminator), body), + Lambda { params, body, .. } => Unimplemented, //self.reduce_lambda(params, body), + NamedStruct { name, fields } => Unimplemented, //self.reduce_named_struct(name, fields), + Index { .. } => Unimplemented, + WhileExpression { .. } => Unimplemented, + ForExpression { .. } => Unimplemented, + ListLiteral { .. } => Unimplemented, + } + } + + fn function(&mut self, statements: &ast::Block) -> Vec { + statements.iter().filter_map(|stmt| self.function_internal_statement(stmt)).collect() + } + + fn prefix(&mut self, prefix: &ast::PrefixOp, arg: &ast::Expression) -> Expression { + let builtin: Option = TryFrom::try_from(prefix).ok(); + match builtin { + Some(op) => { + Expression::Call { + f: Function::Builtin(op), + args: vec![self.expression(arg)], + } + } + None => { + //TODO need this for custom prefix ops + Expression::Unimplemented + } + } + } + + fn binop(&mut self, binop: &ast::BinOp, lhs: &ast::Expression, rhs: &ast::Expression) -> Expression { + use Expression::*; + let operation = Builtin::from_str(binop.sigil()).ok(); + match operation { + Some(Builtin::Assignment) => { + let lval = match &lhs.kind { + ast::ExpressionKind::Value(qualified_name) => { + if let Some(Symbol { def_id, .. }) = self.symbol_table.lookup_symbol(&qualified_name.id) { + def_id.clone() + } else { + return ReductionError(format!("Couldn't look up name: {:?}", qualified_name)); + } + }, + _ => return ReductionError("Trying to assign to a non-name".to_string()), + }; + + Assign { + lval, + rval: Box::new(self.expression(rhs)), + } + }, + Some(op) => { + Expression::Call { + f: Function::Builtin(op), + args: vec![self.expression(lhs), self.expression(rhs)], + } + } + None => { + //TODO handle a user-defined operation + Unimplemented + } + } + } + + fn value(&mut self, qualified_name: &ast::QualifiedName) -> Expression { + let ast::QualifiedName { id, components, .. } = qualified_name; + println!("Qualified name: {:?}", qualified_name); + let maybe_symbol = self.symbol_table.lookup_symbol(&qualified_name.id); + println!("Symbol? {:?}", maybe_symbol); + + Expression::Unimplemented + } +} + +/// The reduced intermediate representation consists of a list of function definitions, and a block +/// of entrypoint statements. In a repl or script context this can be an arbitrary list of +/// statements, in an executable context will likely just be a pointer to the main() function. +#[derive(Debug)] +pub struct ReducedIR { + functions: HashMap, + pub entrypoint: Vec, +} + +impl ReducedIR { + fn debug(&self, symbol_table: &SymbolTable) { + println!("Reduced IR:"); + println!("Functions:"); + println!("-----------"); + for (id, callable) in self.functions.iter() { + let name = &symbol_table.lookup_symbol_by_def(id).unwrap().local_name; + println!("{}({}) -> {:?}", id, name, callable); + } + println!(""); + println!("Entrypoint:"); + println!("-----------"); + for stmt in self.entrypoint.iter() { + println!("{:?}", stmt); + } + println!("-----------"); + } +} + +#[derive(Debug)] +pub enum Callable { + Builtin(Builtin), + UserDefined { + body: Vec + } +} + +#[derive(Debug)] +pub enum Statement { + Expression(Expression), + Binding { + id: DefId, + constant: bool, + expr: Expression + }, +} + +#[derive(Debug)] +pub enum Expression { + Literal(Literal), + Tuple(Vec), + Assign { + lval: DefId, + rval: Box, + }, + Call { + f: Function, + args: Vec + }, + Unimplemented, + ReductionError(String), +} + +#[derive(Debug)] +pub enum Function { + Builtin(Builtin), + Defined(DefId), +} + + +#[derive(Debug)] +pub enum Literal { + Nat(u64), + Int(i64), + Float(f64), + Bool(bool), + StringLit(Rc), +} diff --git a/schala-lang/language/src/reduced_ir/test.rs b/schala-lang/language/src/reduced_ir/test.rs new file mode 100644 index 0000000..7b3c56c --- /dev/null +++ b/schala-lang/language/src/reduced_ir/test.rs @@ -0,0 +1,42 @@ +#![cfg(test)] + +use crate::symbol_table::SymbolTable; +use super::*; + +fn build_ir(input: &str) -> ReducedIR { + let ast = crate::util::quick_ast(input); + + let mut symbol_table = SymbolTable::new(); + symbol_table.process_ast(&ast).unwrap(); + + let reduced = reduce(&ast, &symbol_table); + reduced.debug(&symbol_table); + reduced +} + + +#[test] +fn test_ir() { + let src = r#" + + let global_one = 10 + 20 + let global_two = "the string hello" + + fn a_function(i, j, k) { + fn nested(x) { + x + 10 + } + i + j * nested(k) + } + + fn another_function(e) { + let local_var = 420 + e * local_var + } + + another_function() + "#; + + let reduced = build_ir(src); + assert!(1 == 2); +} diff --git a/schala-lang/language/src/schala.rs b/schala-lang/language/src/schala.rs index b80b42e..5104add 100644 --- a/schala-lang/language/src/schala.rs +++ b/schala-lang/language/src/schala.rs @@ -1,14 +1,14 @@ use stopwatch::Stopwatch; use crate::error::SchalaError; -use crate::{eval, parsing, reduced_ast, symbol_table, tokenizing, typechecking}; +use crate::{eval, parsing, reduced_ast, reduced_ir, tree_walk_eval, symbol_table, tokenizing, typechecking}; use schala_repl::{ ComputationRequest, ComputationResponse, GlobalOutputStats, LangMetaRequest, LangMetaResponse, ProgrammingLanguageInterface, }; /// All the state necessary to parse and execute a Schala program are stored in this struct. -pub struct Schala { +pub struct Schala<'a> { /// Holds a reference to the original source code, parsed into line and character source_reference: SourceReference, /// Execution state for AST-walking interpreter @@ -19,8 +19,12 @@ pub struct Schala { type_context: typechecking::TypeContext<'static>, /// Schala Parser active_parser: parsing::Parser, + + eval_state: tree_walk_eval::State<'a>, + } +/* impl Schala { //TODO implement documentation for language items /* @@ -31,23 +35,25 @@ impl Schala { } */ } +*/ -impl Schala { +impl<'a> Schala<'a> { /// Creates a new Schala environment *without* any prelude. - fn new_blank_env() -> Schala { + fn new_blank_env() -> Schala<'a> { Schala { source_reference: SourceReference::new(), symbol_table: symbol_table::SymbolTable::new(), state: eval::State::new(), type_context: typechecking::TypeContext::new(), active_parser: parsing::Parser::new(), + eval_state: tree_walk_eval::State::new(), } } /// Creates a new Schala environment with the standard prelude, which is defined as ordinary /// Schala code in the file `prelude.schala` #[allow(clippy::new_without_default)] - pub fn new() -> Schala { + pub fn new() -> Schala<'a> { let prelude = include_str!("../source-files/prelude.schala"); let mut env = Schala::new_blank_env(); @@ -88,6 +94,22 @@ impl Schala { .typecheck(&ast) .map_err(SchalaError::from_type_error); + let reduced_ir = reduced_ir::reduce(&ast, &self.symbol_table); + println!("Reduced IR: {:?}", reduced_ir); + + let evaluation_outputs = self.eval_state.evaluate(reduced_ir, true); + let text_output: Result, String> = evaluation_outputs.into_iter().collect(); + + let text_output: Result, SchalaError> = + text_output.map_err(|err| SchalaError::from_string(err, Stage::Evaluation)); + + let eval_output: String = + text_output.map(|v| Iterator::intersperse(v.into_iter(), "\n".to_owned()).collect())?; + + Ok(eval_output) + + /* + // Reduce AST - TODO this doesn't produce an error yet, but probably should let reduced_ast = reduced_ast::reduce(&ast, &self.symbol_table); @@ -102,6 +124,7 @@ impl Schala { text_output.map(|v| Iterator::intersperse(v.into_iter(), "\n".to_owned()).collect())?; Ok(eval_output) + */ } } @@ -151,7 +174,7 @@ fn stage_names() -> Vec<&'static str> { ] } -impl ProgrammingLanguageInterface for Schala { +impl<'a> ProgrammingLanguageInterface for Schala<'a> { //TODO flesh out Config type Config = (); fn language_name() -> String { diff --git a/schala-lang/language/src/symbol_table/mod.rs b/schala-lang/language/src/symbol_table/mod.rs index 77ecc7b..6765020 100644 --- a/schala-lang/language/src/symbol_table/mod.rs +++ b/schala-lang/language/src/symbol_table/mod.rs @@ -15,6 +15,35 @@ mod symbol_trie; use symbol_trie::SymbolTrie; mod test; + +/// ID used for definitions +#[derive(Debug, PartialEq, Eq, Hash, Clone, Default)] +pub struct DefId { + idx: u32, +} + +impl fmt::Display for DefId { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "DefId:{}", self.idx) + } +} + +pub struct DefIdStore { + last_idx: u32 +} + +impl DefIdStore { + pub fn new() -> Self { + Self { last_idx: 0 } + } + + pub fn fresh(&mut self) -> DefId { + let idx = self.last_idx; + self.last_idx += 1; + DefId { idx } + } +} + /// Fully-qualified symbol name #[derive(Debug, Clone, Eq, PartialEq, Hash, PartialOrd, Ord)] pub struct Fqsn { @@ -113,6 +142,9 @@ impl NameTable { //cf. p. 150 or so of Language Implementation Patterns pub struct SymbolTable { + + def_id_store: DefIdStore, + /// Used for import resolution. symbol_trie: SymbolTrie, @@ -131,6 +163,7 @@ pub struct SymbolTable { impl SymbolTable { pub fn new() -> SymbolTable { SymbolTable { + def_id_store: DefIdStore::new(), symbol_trie: SymbolTrie::new(), fq_names: NameTable::new(), types: NameTable::new(), @@ -148,13 +181,18 @@ impl SymbolTable { if !errs.is_empty() { return Err(errs); } - self.resolve_symbol_ids(ast); + self.resolve_scopes(ast); Ok(()) } pub fn lookup_symbol(&self, id: &ItemId) -> Option<&Symbol> { self.id_to_symbol.get(id).map(|s| s.as_ref()) } + + pub fn lookup_symbol_by_def(&self, def: &DefId) -> Option<&Symbol> { + self.id_to_symbol.iter().find(|(_, sym)| sym.def_id == *def) + .map(|(_, sym)| sym.as_ref()) + } } #[allow(dead_code)] @@ -163,6 +201,7 @@ pub struct Symbol { pub local_name: Rc, fully_qualified_name: Fqsn, pub spec: SymbolSpec, + pub def_id: DefId, } impl fmt::Display for Symbol { @@ -224,6 +263,7 @@ impl SymbolTable { local_name: fqsn.local_name(), fully_qualified_name: fqsn.clone(), spec, + def_id: self.def_id_store.fresh(), }); self.symbol_trie.insert(&fqsn); self.fqsn_to_symbol.insert(fqsn, symbol.clone()); @@ -232,8 +272,8 @@ impl SymbolTable { /// Walks the AST, matching the ID of an identifier used in some expression to /// the corresponding Symbol. - fn resolve_symbol_ids(&mut self, ast: &ast::AST) { - let mut resolver = resolver::Resolver::new(self); + fn resolve_scopes(&mut self, ast: &ast::AST) { + let mut resolver = resolver::ScopeResolver::new(self); resolver.resolve(ast); } @@ -367,6 +407,7 @@ impl SymbolTable { kind: NameKind::Binding, }, )?; + println!("Adding Binding symbol: {:?}", fq_binding); self.add_symbol( id, fq_binding, diff --git a/schala-lang/language/src/symbol_table/resolver.rs b/schala-lang/language/src/symbol_table/resolver.rs index ae27f35..d7d8167 100644 --- a/schala-lang/language/src/symbol_table/resolver.rs +++ b/schala-lang/language/src/symbol_table/resolver.rs @@ -6,12 +6,12 @@ use crate::util::ScopeStack; type FqsnPrefix = Vec; -pub struct Resolver<'a> { +pub struct ScopeResolver<'a> { symbol_table: &'a mut super::SymbolTable, name_scope_stack: ScopeStack<'a, Rc, FqsnPrefix>, } -impl<'a> Resolver<'a> { +impl<'a> ScopeResolver<'a> { pub fn new(symbol_table: &'a mut SymbolTable) -> Self { let name_scope_stack: ScopeStack<'a, Rc, FqsnPrefix> = ScopeStack::new(None); Self { @@ -47,7 +47,9 @@ impl<'a> Resolver<'a> { } fn qualified_name(&mut self, name: &QualifiedName) { + println!("Handling qualified_name in resolver.rs: {:?}", name); let fqsn = self.lookup_name_in_scope(name); + println!("Computed FQSN: {:?}", fqsn); let symbol = self.symbol_table.fqsn_to_symbol.get(&fqsn); if let Some(symbol) = symbol { self.symbol_table.id_to_symbol.insert(name.id.clone(), symbol.clone()); @@ -55,7 +57,7 @@ impl<'a> Resolver<'a> { } } -impl<'a> ASTVisitor for Resolver<'a> { +impl<'a> ASTVisitor for ScopeResolver<'a> { //TODO need to un-insert these - maybe need to rethink visitor fn import(&mut self, import_spec: &ImportSpecifier) { let ImportSpecifier { diff --git a/schala-lang/language/src/tree_walk_eval/mod.rs b/schala-lang/language/src/tree_walk_eval/mod.rs new file mode 100644 index 0000000..92c22b9 --- /dev/null +++ b/schala-lang/language/src/tree_walk_eval/mod.rs @@ -0,0 +1,144 @@ +use crate::reduced_ir::{ReducedIR, Expression, Function, Statement, Literal}; +use crate::symbol_table::{DefId}; +use crate::util::ScopeStack; + +use std::fmt::Write; +use std::convert::From; + +type EvalResult = Result; + +#[derive(Debug)] +pub struct State<'a> { + environments: ScopeStack<'a, DefId, RuntimeValue>, +} + +#[derive(Debug)] +struct RuntimeError { + msg: String +} + +impl From for RuntimeError { + fn from(msg: String) -> Self { + Self { + msg + } + } +} + +impl RuntimeError { + fn get_msg(&self) -> String { + format!("Runtime error: {}", self.msg) + } +} + +fn paren_wrapped(terms: impl Iterator) -> String { + let mut buf = String::new(); + write!(buf, "(").unwrap(); + for term in terms.map(Some).intersperse(None) { + match term { + Some(e) => write!(buf, "{}", e).unwrap(), + None => write!(buf, ", ").unwrap(), + }; + } + write!(buf, ")").unwrap(); + buf +} + +#[derive(Debug)] +enum RuntimeValue { + Expression(Expression), +} + +impl From for RuntimeValue { + fn from(ex: Expression) -> Self { + Self::Expression(ex) + } +} + +fn expr_to_repl(expr: &Expression) -> String { + match expr { + Expression::Unimplemented => format!("Expression {:?} not implemented", expr), + Expression::Literal(lit) => match lit { + Literal::Nat(n) => format!("{}", n), + Literal::Int(i) => format!("{}", i), + Literal::Float(f) => format!("{}", f), + Literal::Bool(b) => format!("{}", b), + Literal::StringLit(s) => format!("\"{}\"", s), + } + Expression::Tuple(terms) => paren_wrapped(terms.iter().map(|x| expr_to_repl(x))), + Expression::Assign { lval, rval } => { + "".to_string() + }, + e => format!("Expression {:?} shouldn't be here", e), + } +} + +impl RuntimeValue { + fn to_repl(&self) -> String { + match self { + RuntimeValue::Expression(ref expr) => expr_to_repl(expr) + } + } +} + +impl<'a> State<'a> { + pub fn new() -> Self { + Self { + environments: ScopeStack::new(Some("global".to_string())) + } + } + + pub fn evaluate(&mut self, reduced: ReducedIR, repl: bool) -> Vec> { + let mut acc = vec![]; + + for statement in reduced.entrypoint.into_iter() { + match self.statement(statement) { + Ok(Some(output)) if repl => { + acc.push(Ok(output.to_repl())) + }, + Ok(_) => (), + Err(error) => { + acc.push(Err(error.into())); + return acc; + } + } + } + acc + } + + fn statement(&mut self, stmt: Statement) -> EvalResult> { + match stmt { + Statement::Binding { id, expr, constant } => { + println!("eval() binding id: {}", id); + let evaluated = self.expression(expr)?; + self.environments.insert(id, evaluated.into()); + Ok(None) + }, + Statement::Expression(expr) => { + let evaluated = self.expression(expr)?; + Ok(Some(evaluated.into())) + } + } + } + + fn expression(&mut self, expression: Expression) -> EvalResult { + use Expression::Unimplemented; + Ok(match expression { + lit @ Expression::Literal(_) => lit, + Expression::Tuple(items) => Expression::Tuple(items.into_iter().map(|expr| self.expression(expr)).collect::>>()?), + Expression::Assign { lval, box rval } => { + let mut env = self.environments.lookup(&lval); + Unimplemented + }, + Expression::Call { f, args } => self.call_expression(f, args)?, + Unimplemented => Unimplemented, + Expression::ReductionError(e) => return Err(e.into()), + _ => Expression::Literal(Literal::Nat(69420)), + }) + } + + fn call_expression(&mut self, f: Function, args: Vec) -> EvalResult { + Err("Call expression not implemented".to_string().into()) + } +} +