use std::collections::{hash_map::Entry, HashMap}; use std::fmt; use std::rc::Rc; use crate::ast; use crate::ast::{ Declaration, ItemId, ModuleSpecifier, Statement, StatementKind, TypeBody, TypeSingletonName, Variant, VariantKind, }; use crate::tokenizing::Location; use crate::typechecking::TypeId; mod resolver; mod symbol_trie; use symbol_trie::SymbolTrie; mod test; use crate::identifier::{Id, IdStore, define_id_kind}; define_id_kind!(DefItem); pub type DefId = Id; /// Fully-qualified symbol name #[derive(Debug, Clone, Eq, PartialEq, Hash, PartialOrd, Ord)] pub struct Fqsn { //TODO Fqsn's need to be cheaply cloneable scopes: Vec, //TODO rename to ScopeSegment } impl Fqsn { fn from_scope_stack(scopes: &[Scope], new_name: Rc) -> Self { let mut v = Vec::new(); for s in scopes { v.push(s.clone()); } v.push(Scope::Name(new_name)); Fqsn { scopes: v } } #[cfg(test)] fn from_strs(strs: &[&str]) -> Fqsn { let mut scopes = vec![]; for s in strs { scopes.push(Scope::Name(Rc::new(s.to_string()))); } Fqsn { scopes } } fn local_name(&self) -> Rc { let Scope::Name(name) = self.scopes.last().unwrap(); name.clone() } } impl fmt::Display for Fqsn { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { let delim = "::"; let Fqsn { scopes } = self; write!(f, "FQSN<{}", scopes[0])?; for item in scopes[1..].iter() { write!(f, "{}{}", delim, item)?; } write!(f, ">") } } //TODO eventually this should use ItemId's to avoid String-cloning /// One segment within a scope. #[derive(Debug, Clone, Eq, PartialEq, Hash, PartialOrd, Ord)] enum Scope { Name(Rc), } impl fmt::Display for Scope { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { let Scope::Name(name) = self; write!(f, "{}", name) } } #[allow(dead_code)] #[derive(Debug, Clone)] pub enum SymbolError { DuplicateName { prev_name: Fqsn, location: Location, }, DuplicateRecord { type_name: Fqsn, location: Location, member: String, }, } #[allow(dead_code)] #[derive(Debug)] struct NameSpec { location: Location, kind: K, } #[derive(Debug)] enum NameKind { Module, Function, Binding, } #[derive(Debug)] struct TypeKind; /// Keeps track of what names were used in a given namespace. struct NameTable { table: HashMap>, } impl NameTable { fn new() -> Self { Self { table: HashMap::new(), } } fn register(&mut self, name: Fqsn, spec: NameSpec) -> Result<(), SymbolError> { match self.table.entry(name.clone()) { Entry::Occupied(o) => Err(SymbolError::DuplicateName { prev_name: name, location: o.get().location, }), Entry::Vacant(v) => { v.insert(spec); Ok(()) } } } } //cf. p. 150 or so of Language Implementation Patterns pub struct SymbolTable { def_id_store: IdStore, /// Used for import resolution. symbol_trie: SymbolTrie, /// These tables are responsible for preventing duplicate names. fq_names: NameTable, //Note that presence of two tables implies that a type and other binding with the same name can co-exist types: NameTable, /// A map of the Fqsn of an AST definition to a Symbol data structure, which contains /// some basic information about what that symbol is and (ideally) references to other tables /// (e.g. typechecking tables) with more information about that symbol. fqsn_to_symbol: HashMap>, id_to_symbol: HashMap>, } impl SymbolTable { pub fn new() -> SymbolTable { SymbolTable { def_id_store: IdStore::new(), symbol_trie: SymbolTrie::new(), fq_names: NameTable::new(), types: NameTable::new(), fqsn_to_symbol: HashMap::new(), id_to_symbol: HashMap::new(), } } /// The main entry point into the symbol table. This will traverse the AST in several /// different ways and populate subtables with information that will be used further in the /// compilation process. pub fn process_ast(&mut self, ast: &ast::AST) -> Result<(), Vec> { let errs = self.populate_name_tables(ast); if !errs.is_empty() { return Err(errs); } self.resolve_scopes(ast); Ok(()) } pub fn lookup_symbol(&self, id: &ItemId) -> Option<&Symbol> { self.id_to_symbol.get(id).map(|s| s.as_ref()) } //TODO optimize this pub fn lookup_symbol_by_def(&self, def: &DefId) -> Option<&Symbol> { self.id_to_symbol.iter().find(|(_, sym)| sym.def_id == *def) .map(|(_, sym)| sym.as_ref()) } #[allow(dead_code)] pub fn debug(&self) { println!("Symbol table:"); println!("----------------"); for (id, sym) in self.id_to_symbol.iter() { println!("{} => {}", id, sym); } } } #[allow(dead_code)] #[derive(Debug, Clone)] pub struct Symbol { fully_qualified_name: Fqsn, spec: SymbolSpec, def_id: DefId, } impl Symbol { pub fn local_name(&self) -> Rc { self.fully_qualified_name.local_name() } pub fn def_id(&self) -> Option { Some(self.def_id.clone()) } pub fn spec(&self) -> SymbolSpec { self.spec.clone() } } impl fmt::Display for Symbol { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { write!(f, "", self.local_name(), self.fully_qualified_name, self.spec) } } //TODO - I think I eventually want to draw a distinction between true global items //i.e. global vars, and items whose definitions are scoped. Right now there's a sense //in which Func, DataConstructor, RecordConstructor, and GlobalBinding are "globals", //whereas LocalVarible and FunctionParam have local scope. But right now, they all //get put into a common table, and all get DefId's from a common source. // //It would be good if individual functions could in parallel look up their own //local vars without interfering with other lookups. Also some type definitions //should be scoped in a similar way. // //Also it makes sense that non-globals should not use DefId's, particularly not //function parameters (even though they are currently assigned). #[derive(Debug, Clone)] pub enum SymbolSpec { Func, DataConstructor { tag: u32, arity: usize, type_id: TypeId, }, RecordConstructor { tag: u32, members: HashMap, TypeId>, type_id: TypeId, }, GlobalBinding, //Only for global variables, not for function-local ones or ones within a `let` scope context LocalVariable, FunctionParam(u8), } impl fmt::Display for SymbolSpec { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { use self::SymbolSpec::*; match self { Func => write!(f, "Func"), DataConstructor { tag, type_id, arity, } => write!( f, "DataConstructor(tag: {}, arity: {}, type: {})", tag, arity, type_id ), RecordConstructor { type_id, tag, .. } => write!( f, "RecordConstructor(tag: {})( -> {})", tag, type_id ), GlobalBinding => write!(f, "GlobalBinding"), LocalVariable => write!(f, "Local variable"), FunctionParam(n) => write!(f, "Function param: {}", n), } } } impl SymbolTable { /* note: this adds names for *forward reference* but doesn't actually create any types. solve that problem * later */ /// Register a new mapping of a fully-qualified symbol name (e.g. `Option::Some`) /// to a Symbol, a descriptor of what that name refers to. fn add_symbol(&mut self, id: &ItemId, fqsn: Fqsn, spec: SymbolSpec) { let def_id = self.def_id_store.fresh(); let symbol = Rc::new(Symbol { fully_qualified_name: fqsn.clone(), spec, def_id, }); println!("In add_symbol(), adding: {:?}", symbol); self.symbol_trie.insert(&fqsn); self.fqsn_to_symbol.insert(fqsn, symbol.clone()); self.id_to_symbol.insert(id.clone(), symbol); } /// Walks the AST, matching the ID of an identifier used in some expression to /// the corresponding Symbol. fn resolve_scopes(&mut self, ast: &ast::AST) { let mut resolver = resolver::ScopeResolver::new(self); resolver.resolve(ast); } /// This function traverses the AST and adds symbol table entries for /// constants, functions, types, and modules defined within. This simultaneously /// checks for dupicate definitions (and returns errors if discovered), and sets /// up name tables that will be used by further parts of the compiler fn populate_name_tables(&mut self, ast: &ast::AST) -> Vec { let mut scope_stack = vec![]; self.add_from_scope(ast.statements.as_ref(), &mut scope_stack, false) } fn add_from_scope<'a>( &'a mut self, statements: &[Statement], scope_stack: &mut Vec, function_scope: bool, ) -> Vec { let mut errors = vec![]; for statement in statements { let Statement { id, kind, location, } = statement; //TODO I'm not sure if I need to do anything with this ID let location = *location; if let Err(err) = self.add_single_statement(id, kind, location, scope_stack, function_scope) { errors.push(err); } else { // If there's an error with a name, don't recurse into subscopes of that name let recursive_errs = match kind { StatementKind::Declaration(Declaration::FuncDecl(signature, body)) => { let new_scope = Scope::Name(signature.name.clone()); scope_stack.push(new_scope); let output = self.add_from_scope(body.as_ref(), scope_stack, true); scope_stack.pop(); output } StatementKind::Module(ModuleSpecifier { name, contents }) => { let new_scope = Scope::Name(name.clone()); scope_stack.push(new_scope); let output = self.add_from_scope(contents.as_ref(), scope_stack, false); scope_stack.pop(); output } StatementKind::Declaration(Declaration::TypeDecl { name, body, mutable, }) => self.add_type_members(name, body, mutable, location, scope_stack), _ => vec![], }; errors.extend(recursive_errs.into_iter()); } } errors } fn add_single_statement( &mut self, id: &ItemId, kind: &StatementKind, location: Location, scope_stack: &[Scope], function_scope: bool, ) -> Result<(), SymbolError> { match kind { StatementKind::Declaration(Declaration::FuncSig(signature)) => { let fq_function = Fqsn::from_scope_stack(scope_stack, signature.name.clone()); self.fq_names.register( fq_function.clone(), NameSpec { location, kind: NameKind::Function, }, )?; self.types.register( fq_function.clone(), NameSpec { location, kind: TypeKind, }, )?; self.add_symbol( id, fq_function, SymbolSpec::Func, ); } StatementKind::Declaration(Declaration::FuncDecl(signature, ..)) => { let fn_name = &signature.name; let fq_function = Fqsn::from_scope_stack(scope_stack, fn_name.clone()); self.fq_names.register( fq_function.clone(), NameSpec { location, kind: NameKind::Function, }, )?; self.types.register( fq_function.clone(), NameSpec { location, kind: TypeKind, }, )?; self.add_symbol( id, fq_function, SymbolSpec::Func, ); } StatementKind::Declaration(Declaration::TypeDecl { name, .. }) => { let fq_type = Fqsn::from_scope_stack(scope_stack, name.name.clone()); self.types.register( fq_type, NameSpec { location, kind: TypeKind, }, )?; } StatementKind::Declaration(Declaration::Binding { name, .. }) => { let fq_binding = Fqsn::from_scope_stack(scope_stack, name.clone()); self.fq_names.register( fq_binding.clone(), NameSpec { location, kind: NameKind::Binding, }, )?; if !function_scope { self.add_symbol( id, fq_binding, SymbolSpec::GlobalBinding, ); } } StatementKind::Module(ModuleSpecifier { name, .. }) => { let fq_module = Fqsn::from_scope_stack(scope_stack, name.clone()); self.fq_names.register( fq_module, NameSpec { location, kind: NameKind::Module, }, )?; } _ => (), } Ok(()) } fn add_type_members( &mut self, type_name: &TypeSingletonName, type_body: &TypeBody, _mutable: &bool, location: Location, scope_stack: &mut Vec, ) -> Vec { let mut member_errors = vec![]; let mut errors = vec![]; let mut register = |id: &ItemId, fqsn: Fqsn, spec: SymbolSpec| { let name_spec = NameSpec { location, kind: TypeKind, }; if let Err(err) = self.types.register(fqsn.clone(), name_spec) { errors.push(err); } else { self.add_symbol(id, fqsn, spec); }; }; let TypeBody(variants) = type_body; let new_scope = Scope::Name(type_name.name.clone()); scope_stack.push(new_scope); for (index, variant) in variants.iter().enumerate() { let tag = index as u32; let Variant { name, kind, id } = variant; let type_id = TypeId::lookup_name(name.as_ref()); match kind { VariantKind::UnitStruct => { let fq_name = Fqsn::from_scope_stack(scope_stack.as_ref(), name.clone()); let spec = SymbolSpec::DataConstructor { tag, arity: 0, type_id, }; register(id, fq_name, spec); } VariantKind::TupleStruct(items) => { let fq_name = Fqsn::from_scope_stack(scope_stack.as_ref(), name.clone()); let spec = SymbolSpec::DataConstructor { tag, arity: items.len(), type_id, }; register(id, fq_name, spec); } VariantKind::Record(members) => { let fq_name = Fqsn::from_scope_stack(scope_stack.as_ref(), name.clone()); let mut seen_members = HashMap::new(); for (member_name, _) in members.iter() { match seen_members.entry(member_name.as_ref()) { Entry::Occupied(o) => { let location = *o.get(); member_errors.push(SymbolError::DuplicateRecord { type_name: fq_name.clone(), location, member: member_name.as_ref().to_string(), }); } //TODO eventually this should track meaningful locations Entry::Vacant(v) => { v.insert(Location::default()); } } } let spec = SymbolSpec::RecordConstructor { tag, type_id, members: members .iter() .map(|(member_name, _type_identifier)| { ( member_name.clone(), TypeId::lookup_name("DUMMY_TYPE_ID"), ) }) .collect(), }; register(id, fq_name, spec); } } } scope_stack.pop(); errors.extend(member_errors.into_iter()); errors } }