#![allow(clippy::enum_variant_names)] use std::{ collections::{hash_map::Entry, HashMap, HashSet}, fmt, rc::Rc, str::FromStr, }; use crate::{ ast, ast::{ Declaration, Expression, ExpressionKind, ItemId, Statement, StatementKind, TypeBody, TypeSingletonName, Variant, VariantKind, }, builtin::Builtin, tokenizing::Location, type_inference::{self, PendingType, TypeBuilder, TypeContext, TypeId, VariantBuilder}, }; mod fqsn; pub use fqsn::{Fqsn, ScopeSegment}; mod resolver; mod symbol_trie; use symbol_trie::SymbolTrie; mod test; use crate::identifier::{define_id_kind, Id, IdStore}; define_id_kind!(DefItem); pub type DefId = Id; #[allow(dead_code)] #[derive(Debug, Clone)] pub enum SymbolError { DuplicateName { prev_name: Fqsn, location: Location }, DuplicateVariant { type_fqsn: Fqsn, name: String }, DuplicateRecord { type_name: Fqsn, location: Location, member: String }, UnknownAnnotation { name: String }, BadAnnotation { name: String, msg: String }, } #[allow(dead_code)] #[derive(Debug)] struct NameSpec { location: Location, kind: K, } #[derive(Debug)] enum NameKind { Module, Function, Binding, } #[derive(Debug)] struct TypeKind; /// Keeps track of what names were used in a given namespace. struct NameTable { table: HashMap>, } impl NameTable { fn new() -> Self { Self { table: HashMap::new() } } fn register(&mut self, name: Fqsn, spec: NameSpec) -> Result<(), SymbolError> { match self.table.entry(name.clone()) { Entry::Occupied(o) => Err(SymbolError::DuplicateName { prev_name: name, location: o.get().location }), Entry::Vacant(v) => { v.insert(spec); Ok(()) } } } } //cf. p. 150 or so of Language Implementation Patterns pub struct SymbolTable { def_id_store: IdStore, /// Used for import resolution. symbol_trie: SymbolTrie, /// These tables are responsible for preventing duplicate names. fq_names: NameTable, //Note that presence of two tables implies that a type and other binding with the same name can co-exist types: NameTable, id_to_def: HashMap, def_to_symbol: HashMap>, } impl SymbolTable { pub fn new() -> SymbolTable { let table = SymbolTable { def_id_store: IdStore::new(), symbol_trie: SymbolTrie::new(), fq_names: NameTable::new(), types: NameTable::new(), id_to_def: HashMap::new(), def_to_symbol: HashMap::new(), }; table } /// The main entry point into the symbol table. This will traverse the AST in several /// different ways and populate subtables with information that will be used further in the /// compilation process. pub fn process_ast( &mut self, ast: &ast::AST, type_context: &mut TypeContext, ) -> Result<(), Vec> { let mut runner = SymbolTableRunner { type_context, table: self }; let errs = runner.populate_name_tables(ast); if !errs.is_empty() { return Err(errs); } runner.resolve_scopes(ast); Ok(()) } pub fn lookup_symbol(&self, id: &ItemId) -> Option<&Symbol> { let def = self.id_to_def.get(id)?; self.def_to_symbol.get(def).map(|s| s.as_ref()) } pub fn lookup_symbol_by_def(&self, def: &DefId) -> Option<&Symbol> { self.def_to_symbol.get(def).map(|s| s.as_ref()) } #[allow(dead_code)] pub fn debug(&self) { println!("Symbol table:"); println!("----------------"); for (id, def) in self.id_to_def.iter() { if let Some(symbol) = self.def_to_symbol.get(def) { println!("{} => {}: {}", id, def, symbol); } else { println!("{} => {} ", id, def); } } } /// Register a new mapping of a fully-qualified symbol name (e.g. `Option::Some`) /// to a Symbol, a descriptor of what that name refers to. fn add_symbol(&mut self, id: &ItemId, fqsn: Fqsn, spec: SymbolSpec) { let def_id = self.def_id_store.fresh(); let symbol = Rc::new(Symbol { fully_qualified_name: fqsn.clone(), spec, def_id }); self.symbol_trie.insert(&fqsn, def_id); self.id_to_def.insert(*id, def_id); self.def_to_symbol.insert(def_id, symbol); } fn populate_single_builtin(&mut self, fqsn: Fqsn, builtin: Builtin) { let def_id = self.def_id_store.fresh(); let spec = SymbolSpec::Builtin(builtin); let symbol = Rc::new(Symbol { fully_qualified_name: fqsn.clone(), spec, def_id }); self.symbol_trie.insert(&fqsn, def_id); self.def_to_symbol.insert(def_id, symbol); } } struct SymbolTableRunner<'a> { type_context: &'a mut TypeContext, table: &'a mut SymbolTable, } #[allow(dead_code)] #[derive(Debug, Clone)] pub struct Symbol { fully_qualified_name: Fqsn, spec: SymbolSpec, def_id: DefId, } impl Symbol { pub fn local_name(&self) -> Rc { self.fully_qualified_name.last_elem() } pub fn def_id(&self) -> Option { Some(self.def_id) } pub fn spec(&self) -> SymbolSpec { self.spec.clone() } } impl fmt::Display for Symbol { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { write!(f, "", self.local_name(), self.fully_qualified_name, self.spec) } } //TODO - I think I eventually want to draw a distinction between true global items //i.e. global vars, and items whose definitions are scoped. Right now there's a sense //in which Func, DataConstructor, RecordConstructor, and GlobalBinding are "globals", //whereas LocalVarible and FunctionParam have local scope. But right now, they all //get put into a common table, and all get DefId's from a common source. // //It would be good if individual functions could in parallel look up their own //local vars without interfering with other lookups. Also some type definitions //should be scoped in a similar way. // //Also it makes sense that non-globals should not use DefId's, particularly not //function parameters (even though they are currently assigned). #[derive(Debug, Clone)] pub enum SymbolSpec { Builtin(Builtin), Func, DataConstructor { tag: u32, type_id: TypeId }, RecordConstructor { tag: u32, type_id: TypeId }, GlobalBinding, //Only for global variables, not for function-local ones or ones within a `let` scope context LocalVariable, FunctionParam(u8), } impl fmt::Display for SymbolSpec { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { use self::SymbolSpec::*; match self { Builtin(b) => write!(f, "Builtin: {:?}", b), Func => write!(f, "Func"), DataConstructor { tag, type_id } => write!(f, "DataConstructor(tag: {}, type: {})", tag, type_id), RecordConstructor { type_id, tag, .. } => write!(f, "RecordConstructor(tag: {})( -> {})", tag, type_id), GlobalBinding => write!(f, "GlobalBinding"), LocalVariable => write!(f, "Local variable"), FunctionParam(n) => write!(f, "Function param: {}", n), } } } impl<'a> SymbolTableRunner<'a> { /* note: this adds names for *forward reference* but doesn't actually create any types. solve that problem * later */ fn add_symbol(&mut self, id: &ItemId, fqsn: Fqsn, spec: SymbolSpec) { self.table.add_symbol(id, fqsn, spec) } /// Walks the AST, matching the ID of an identifier used in some expression to /// the corresponding Symbol. fn resolve_scopes(&mut self, ast: &ast::AST) { let mut resolver = resolver::ScopeResolver::new(self.table); resolver.resolve(ast); } /// This function traverses the AST and adds symbol table entries for /// constants, functions, types, and modules defined within. This simultaneously /// checks for dupicate definitions (and returns errors if discovered), and sets /// up name tables that will be used by further parts of the compiler fn populate_name_tables(&mut self, ast: &ast::AST) -> Vec { let mut scope_stack = vec![]; self.add_from_scope(ast.statements.as_ref(), &mut scope_stack, false) } fn add_from_scope( &mut self, statements: &[Statement], scope_stack: &mut Vec, function_scope: bool, ) -> Vec { let mut errors = vec![]; for statement in statements { let Statement { id, kind, location } = statement; let location = *location; if let Err(err) = self.add_single_statement(id, kind, location, scope_stack, function_scope) { errors.push(err); } else { // If there's an error with a name, don't recurse into subscopes of that name let recursive_errs = match kind { StatementKind::Declaration(Declaration::FuncDecl(signature, body)) => { let new_scope = ScopeSegment::Name(signature.name.clone()); scope_stack.push(new_scope); let output = self.add_from_scope(body.as_ref(), scope_stack, true); scope_stack.pop(); output } StatementKind::Declaration(Declaration::Module { name, items }) => { let new_scope = ScopeSegment::Name(name.clone()); scope_stack.push(new_scope); let output = self.add_from_scope(items.as_ref(), scope_stack, false); scope_stack.pop(); output } StatementKind::Declaration(Declaration::TypeDecl { name, body, mutable }) => self.add_type_members(name, body, mutable, location, scope_stack), _ => vec![], }; errors.extend(recursive_errs.into_iter()); } } errors } fn add_single_statement( &mut self, id: &ItemId, kind: &StatementKind, location: Location, scope_stack: &[ScopeSegment], function_scope: bool, ) -> Result<(), SymbolError> { match kind { StatementKind::Declaration(Declaration::FuncSig(signature)) => { let fq_function = Fqsn::from_scope_stack(scope_stack, signature.name.clone()); self.table .fq_names .register(fq_function.clone(), NameSpec { location, kind: NameKind::Function })?; self.table.types.register(fq_function.clone(), NameSpec { location, kind: TypeKind })?; self.add_symbol(id, fq_function, SymbolSpec::Func); } StatementKind::Declaration(Declaration::FuncDecl(signature, ..)) => { let fn_name = &signature.name; let fq_function = Fqsn::from_scope_stack(scope_stack, fn_name.clone()); self.table .fq_names .register(fq_function.clone(), NameSpec { location, kind: NameKind::Function })?; self.table.types.register(fq_function.clone(), NameSpec { location, kind: TypeKind })?; self.add_symbol(id, fq_function, SymbolSpec::Func); } StatementKind::Declaration(Declaration::TypeDecl { name, .. }) => { let fq_type = Fqsn::from_scope_stack(scope_stack, name.name.clone()); self.table.types.register(fq_type, NameSpec { location, kind: TypeKind })?; } StatementKind::Declaration(Declaration::Binding { name, .. }) => { let fq_binding = Fqsn::from_scope_stack(scope_stack, name.clone()); self.table .fq_names .register(fq_binding.clone(), NameSpec { location, kind: NameKind::Binding })?; if !function_scope { self.add_symbol(id, fq_binding, SymbolSpec::GlobalBinding); } } StatementKind::Declaration(Declaration::Module { name, .. }) => { let fq_module = Fqsn::from_scope_stack(scope_stack, name.clone()); self.table.fq_names.register(fq_module, NameSpec { location, kind: NameKind::Module })?; } StatementKind::Declaration(Declaration::Annotation { name, arguments, inner }) => { let inner = inner.as_ref(); self.add_single_statement( &inner.id, &inner.kind, inner.location, scope_stack, function_scope, )?; self.process_annotation(name.as_ref(), arguments.as_slice(), scope_stack, inner)?; } _ => (), } Ok(()) } fn process_annotation( &mut self, name: &str, arguments: &[Expression], scope_stack: &[ScopeSegment], inner: &Statement, ) -> Result<(), SymbolError> { println!("handling annotation: {}", name); if name == "register_builtin" { if let Statement { id: _, location: _, kind: StatementKind::Declaration(Declaration::FuncDecl(sig, _)), } = inner { let fqsn = Fqsn::from_scope_stack(scope_stack, sig.name.clone()); let builtin_name = match arguments { [Expression { kind: ExpressionKind::Value(qname), .. }] if qname.components.len() == 1 => qname.components[0].clone(), _ => return Err(SymbolError::BadAnnotation { name: name.to_string(), msg: "Bad argument for register_builtin".to_string(), }), }; let builtin = Builtin::from_str(builtin_name.as_str()).map_err(|_| SymbolError::BadAnnotation { name: name.to_string(), msg: format!("Invalid builtin: {}", builtin_name), })?; self.table.populate_single_builtin(fqsn, builtin); Ok(()) } else { Err(SymbolError::BadAnnotation { name: name.to_string(), msg: "register_builtin not annotating a function".to_string(), }) } } else { Err(SymbolError::UnknownAnnotation { name: name.to_string() }) } } fn add_type_members( &mut self, type_name: &TypeSingletonName, type_body: &TypeBody, _mutable: &bool, location: Location, scope_stack: &mut Vec, ) -> Vec { let (variants, immediate_variant) = match type_body { TypeBody::Variants(variants) => (variants.clone(), false), TypeBody::ImmediateRecord(id, fields) => ( vec![Variant { id: *id, name: type_name.name.clone(), kind: VariantKind::Record(fields.clone()), }], true, ), }; let type_fqsn = Fqsn::from_scope_stack(scope_stack, type_name.name.clone()); let new_scope = ScopeSegment::Name(type_name.name.clone()); scope_stack.push(new_scope); // Check for duplicates before registering any types with the TypeContext let mut seen_variants = HashSet::new(); let mut errors = vec![]; for variant in variants.iter() { if seen_variants.contains(&variant.name) { errors.push(SymbolError::DuplicateVariant { type_fqsn: type_fqsn.clone(), name: variant.name.as_ref().to_string(), }) } seen_variants.insert(variant.name.clone()); if let VariantKind::Record(ref members) = variant.kind { let variant_name = Fqsn::from_scope_stack(scope_stack.as_ref(), variant.name.clone()); let mut seen_members = HashMap::new(); for (member_name, _) in members.iter() { match seen_members.entry(member_name.as_ref()) { Entry::Occupied(o) => { let location = *o.get(); errors.push(SymbolError::DuplicateRecord { type_name: variant_name.clone(), location, member: member_name.as_ref().to_string(), }); } //TODO eventually this should track meaningful locations Entry::Vacant(v) => { v.insert(location); } } } } } if !errors.is_empty() { return errors; } let mut type_builder = TypeBuilder::new(type_name.name.as_ref()); let mut fqsn_id_map = HashMap::new(); for variant in variants.iter() { let Variant { name, kind, id } = variant; fqsn_id_map.insert(Fqsn::from_scope_stack(scope_stack.as_ref(), name.clone()), id); let mut variant_builder = VariantBuilder::new(name.as_ref()); match kind { VariantKind::UnitStruct => (), VariantKind::TupleStruct(items) => for type_identifier in items { let pending: PendingType = type_identifier.into(); variant_builder.add_member(pending); }, VariantKind::Record(members) => for (field_name, type_identifier) in members.iter() { let pending: PendingType = type_identifier.into(); variant_builder.add_record_member(field_name.as_ref(), pending); }, } type_builder.add_variant(variant_builder); } let type_id = self.type_context.register_type(type_builder); let type_definition = self.type_context.lookup_type(&type_id).unwrap(); // This index is guaranteed to be the correct tag for (index, variant) in type_definition.variants.iter().enumerate() { let fqsn = Fqsn::from_scope_stack(scope_stack.as_ref(), Rc::new(variant.name.to_string())); let id = fqsn_id_map.get(&fqsn).unwrap(); let tag = index as u32; let spec = match &variant.members { type_inference::VariantMembers::Unit => SymbolSpec::DataConstructor { tag, type_id }, type_inference::VariantMembers::Tuple(..) => SymbolSpec::DataConstructor { tag, type_id }, type_inference::VariantMembers::Record(..) => SymbolSpec::RecordConstructor { tag, type_id }, }; self.table.add_symbol(id, fqsn, spec); } if immediate_variant { let variant = &type_definition.variants[0]; let fqsn = Fqsn::from_scope_stack(scope_stack.as_ref(), Rc::new(variant.name.to_string())); let id = fqsn_id_map.get(&fqsn).unwrap(); let abbrev_fqsn = Fqsn::from_scope_stack( scope_stack[0..scope_stack.len() - 1].as_ref(), Rc::new(variant.name.to_string()), ); let spec = SymbolSpec::RecordConstructor { tag: 0, type_id }; self.table.add_symbol(id, abbrev_fqsn, spec); } scope_stack.pop(); vec![] } }