schala/schala-lang/language/src/symbol_table/mod.rs

565 lines
18 KiB
Rust

use std::collections::{hash_map::Entry, HashMap};
use std::fmt;
use std::rc::Rc;
use crate::ast;
use crate::ast::{
Declaration, ItemId, ModuleSpecifier, Statement, StatementKind, TypeBody, TypeSingletonName,
Variant, VariantKind,
};
use crate::tokenizing::Location;
use crate::typechecking::TypeId;
mod resolver;
mod symbol_trie;
use symbol_trie::SymbolTrie;
mod test;
use crate::identifier::{Id, IdStore, define_id_kind};
define_id_kind!(DefItem);
pub type DefId = Id<DefItem>;
/// Fully-qualified symbol name
#[derive(Debug, Clone, Eq, PartialEq, Hash, PartialOrd, Ord)]
pub struct Fqsn {
//TODO Fqsn's need to be cheaply cloneable
scopes: Vec<Scope>, //TODO rename to ScopeSegment
}
impl Fqsn {
fn from_scope_stack(scopes: &[Scope], new_name: Rc<String>) -> Self {
let mut v = Vec::new();
for s in scopes {
v.push(s.clone());
}
v.push(Scope::Name(new_name));
Fqsn { scopes: v }
}
#[cfg(test)]
fn from_strs(strs: &[&str]) -> Fqsn {
let mut scopes = vec![];
for s in strs {
scopes.push(Scope::Name(Rc::new(s.to_string())));
}
Fqsn { scopes }
}
fn local_name(&self) -> Rc<String> {
let Scope::Name(name) = self.scopes.last().unwrap();
name.clone()
}
}
impl fmt::Display for Fqsn {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
let delim = "::";
let Fqsn { scopes } = self;
write!(f, "FQSN<{}", scopes[0])?;
for item in scopes[1..].iter() {
write!(f, "{}{}", delim, item)?;
}
write!(f, ">")
}
}
//TODO eventually this should use ItemId's to avoid String-cloning
/// One segment within a scope.
#[derive(Debug, Clone, Eq, PartialEq, Hash, PartialOrd, Ord)]
enum Scope {
Name(Rc<String>),
}
impl fmt::Display for Scope {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
let Scope::Name(name) = self;
write!(f, "{}", name)
}
}
#[allow(dead_code)]
#[derive(Debug, Clone)]
pub enum SymbolError {
DuplicateName {
prev_name: Fqsn,
location: Location,
},
DuplicateRecord {
type_name: Fqsn,
location: Location,
member: String,
},
}
#[allow(dead_code)]
#[derive(Debug)]
struct NameSpec<K> {
location: Location,
kind: K,
}
#[derive(Debug)]
enum NameKind {
Module,
Function,
Binding,
}
#[derive(Debug)]
struct TypeKind;
/// Keeps track of what names were used in a given namespace.
struct NameTable<K> {
table: HashMap<Fqsn, NameSpec<K>>,
}
impl<K> NameTable<K> {
fn new() -> Self {
Self {
table: HashMap::new(),
}
}
fn register(&mut self, name: Fqsn, spec: NameSpec<K>) -> Result<(), SymbolError> {
match self.table.entry(name.clone()) {
Entry::Occupied(o) => Err(SymbolError::DuplicateName {
prev_name: name,
location: o.get().location,
}),
Entry::Vacant(v) => {
v.insert(spec);
Ok(())
}
}
}
}
//cf. p. 150 or so of Language Implementation Patterns
pub struct SymbolTable {
def_id_store: IdStore<DefItem>,
/// Used for import resolution.
symbol_trie: SymbolTrie,
/// These tables are responsible for preventing duplicate names.
fq_names: NameTable<NameKind>, //Note that presence of two tables implies that a type and other binding with the same name can co-exist
types: NameTable<TypeKind>,
/// A map of the Fqsn of an AST definition to a Symbol data structure, which contains
/// some basic information about what that symbol is and (ideally) references to other tables
/// (e.g. typechecking tables) with more information about that symbol.
fqsn_to_symbol: HashMap<Fqsn, Rc<Symbol>>,
id_to_symbol: HashMap<ItemId, Rc<Symbol>>,
}
impl SymbolTable {
pub fn new() -> SymbolTable {
SymbolTable {
def_id_store: IdStore::new(),
symbol_trie: SymbolTrie::new(),
fq_names: NameTable::new(),
types: NameTable::new(),
fqsn_to_symbol: HashMap::new(),
id_to_symbol: HashMap::new(),
}
}
/// The main entry point into the symbol table. This will traverse the AST in several
/// different ways and populate subtables with information that will be used further in the
/// compilation process.
pub fn process_ast(&mut self, ast: &ast::AST) -> Result<(), Vec<SymbolError>> {
let errs = self.populate_name_tables(ast);
if !errs.is_empty() {
return Err(errs);
}
self.resolve_scopes(ast);
Ok(())
}
pub fn lookup_symbol(&self, id: &ItemId) -> Option<&Symbol> {
self.id_to_symbol.get(id).map(|s| s.as_ref())
}
//TODO optimize this
pub fn lookup_symbol_by_def(&self, def: &DefId) -> Option<&Symbol> {
self.id_to_symbol.iter().find(|(_, sym)| sym.def_id == *def)
.map(|(_, sym)| sym.as_ref())
}
#[allow(dead_code)]
pub fn debug(&self) {
println!("Symbol table:");
println!("----------------");
for (id, sym) in self.id_to_symbol.iter() {
println!("{} => {}", id, sym);
}
}
}
#[allow(dead_code)]
#[derive(Debug, Clone)]
pub struct Symbol {
fully_qualified_name: Fqsn,
spec: SymbolSpec,
def_id: DefId,
}
impl Symbol {
pub fn local_name(&self) -> Rc<String> {
self.fully_qualified_name.local_name()
}
pub fn def_id(&self) -> Option<DefId> {
Some(self.def_id.clone())
}
pub fn spec(&self) -> SymbolSpec {
self.spec.clone()
}
}
impl fmt::Display for Symbol {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "<Local name: {}, {}, Spec: {}>", self.local_name(), self.fully_qualified_name, self.spec)
}
}
//TODO - I think I eventually want to draw a distinction between true global items
//i.e. global vars, and items whose definitions are scoped. Right now there's a sense
//in which Func, DataConstructor, RecordConstructor, and GlobalBinding are "globals",
//whereas LocalVarible and FunctionParam have local scope. But right now, they all
//get put into a common table, and all get DefId's from a common source.
//
//It would be good if individual functions could in parallel look up their own
//local vars without interfering with other lookups. Also some type definitions
//should be scoped in a similar way.
//
//Also it makes sense that non-globals should not use DefId's, particularly not
//function parameters (even though they are currently assigned).
#[derive(Debug, Clone)]
pub enum SymbolSpec {
Func,
DataConstructor {
tag: u32,
arity: usize,
type_id: TypeId,
},
RecordConstructor {
tag: u32,
members: HashMap<Rc<String>, TypeId>,
type_id: TypeId,
},
GlobalBinding, //Only for global variables, not for function-local ones or ones within a `let` scope context
LocalVariable,
FunctionParam(u8),
}
impl fmt::Display for SymbolSpec {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
use self::SymbolSpec::*;
match self {
Func => write!(f, "Func"),
DataConstructor {
tag,
type_id,
arity,
} => write!(
f,
"DataConstructor(tag: {}, arity: {}, type: {})",
tag, arity, type_id
),
RecordConstructor {
type_id, tag, ..
} => write!(
f,
"RecordConstructor(tag: {})(<members> -> {})",
tag, type_id
),
GlobalBinding => write!(f, "GlobalBinding"),
LocalVariable => write!(f, "Local variable"),
FunctionParam(n) => write!(f, "Function param: {}", n),
}
}
}
impl SymbolTable {
/* note: this adds names for *forward reference* but doesn't actually create any types. solve that problem
* later */
/// Register a new mapping of a fully-qualified symbol name (e.g. `Option::Some`)
/// to a Symbol, a descriptor of what that name refers to.
fn add_symbol(&mut self, id: &ItemId, fqsn: Fqsn, spec: SymbolSpec) {
let def_id = self.def_id_store.fresh();
let symbol = Rc::new(Symbol {
fully_qualified_name: fqsn.clone(),
spec,
def_id,
});
println!("In add_symbol(), adding: {:?}", symbol);
self.symbol_trie.insert(&fqsn);
self.fqsn_to_symbol.insert(fqsn, symbol.clone());
self.id_to_symbol.insert(id.clone(), symbol);
}
/// Walks the AST, matching the ID of an identifier used in some expression to
/// the corresponding Symbol.
fn resolve_scopes(&mut self, ast: &ast::AST) {
let mut resolver = resolver::ScopeResolver::new(self);
resolver.resolve(ast);
}
/// This function traverses the AST and adds symbol table entries for
/// constants, functions, types, and modules defined within. This simultaneously
/// checks for dupicate definitions (and returns errors if discovered), and sets
/// up name tables that will be used by further parts of the compiler
fn populate_name_tables(&mut self, ast: &ast::AST) -> Vec<SymbolError> {
let mut scope_stack = vec![];
self.add_from_scope(ast.statements.as_ref(), &mut scope_stack, false)
}
fn add_from_scope<'a>(
&'a mut self,
statements: &[Statement],
scope_stack: &mut Vec<Scope>,
function_scope: bool,
) -> Vec<SymbolError> {
let mut errors = vec![];
for statement in statements {
let Statement {
id,
kind,
location,
} = statement; //TODO I'm not sure if I need to do anything with this ID
let location = *location;
if let Err(err) = self.add_single_statement(id, kind, location, scope_stack, function_scope) {
errors.push(err);
} else {
// If there's an error with a name, don't recurse into subscopes of that name
let recursive_errs = match kind {
StatementKind::Declaration(Declaration::FuncDecl(signature, body)) => {
let new_scope = Scope::Name(signature.name.clone());
scope_stack.push(new_scope);
let output = self.add_from_scope(body.as_ref(), scope_stack, true);
scope_stack.pop();
output
}
StatementKind::Module(ModuleSpecifier { name, contents }) => {
let new_scope = Scope::Name(name.clone());
scope_stack.push(new_scope);
let output = self.add_from_scope(contents.as_ref(), scope_stack, false);
scope_stack.pop();
output
}
StatementKind::Declaration(Declaration::TypeDecl {
name,
body,
mutable,
}) => self.add_type_members(name, body, mutable, location, scope_stack),
_ => vec![],
};
errors.extend(recursive_errs.into_iter());
}
}
errors
}
fn add_single_statement(
&mut self,
id: &ItemId,
kind: &StatementKind,
location: Location,
scope_stack: &[Scope],
function_scope: bool,
) -> Result<(), SymbolError> {
match kind {
StatementKind::Declaration(Declaration::FuncSig(signature)) => {
let fq_function = Fqsn::from_scope_stack(scope_stack, signature.name.clone());
self.fq_names.register(
fq_function.clone(),
NameSpec {
location,
kind: NameKind::Function,
},
)?;
self.types.register(
fq_function.clone(),
NameSpec {
location,
kind: TypeKind,
},
)?;
self.add_symbol(
id,
fq_function,
SymbolSpec::Func,
);
}
StatementKind::Declaration(Declaration::FuncDecl(signature, ..)) => {
let fn_name = &signature.name;
let fq_function = Fqsn::from_scope_stack(scope_stack, fn_name.clone());
self.fq_names.register(
fq_function.clone(),
NameSpec {
location,
kind: NameKind::Function,
},
)?;
self.types.register(
fq_function.clone(),
NameSpec {
location,
kind: TypeKind,
},
)?;
self.add_symbol(
id,
fq_function,
SymbolSpec::Func,
);
}
StatementKind::Declaration(Declaration::TypeDecl { name, .. }) => {
let fq_type = Fqsn::from_scope_stack(scope_stack, name.name.clone());
self.types.register(
fq_type,
NameSpec {
location,
kind: TypeKind,
},
)?;
}
StatementKind::Declaration(Declaration::Binding { name, .. }) => {
let fq_binding = Fqsn::from_scope_stack(scope_stack, name.clone());
self.fq_names.register(
fq_binding.clone(),
NameSpec {
location,
kind: NameKind::Binding,
},
)?;
if !function_scope {
self.add_symbol(
id,
fq_binding,
SymbolSpec::GlobalBinding,
);
}
}
StatementKind::Module(ModuleSpecifier { name, .. }) => {
let fq_module = Fqsn::from_scope_stack(scope_stack, name.clone());
self.fq_names.register(
fq_module,
NameSpec {
location,
kind: NameKind::Module,
},
)?;
}
_ => (),
}
Ok(())
}
fn add_type_members(
&mut self,
type_name: &TypeSingletonName,
type_body: &TypeBody,
_mutable: &bool,
location: Location,
scope_stack: &mut Vec<Scope>,
) -> Vec<SymbolError> {
let mut member_errors = vec![];
let mut errors = vec![];
let mut register = |id: &ItemId, fqsn: Fqsn, spec: SymbolSpec| {
let name_spec = NameSpec {
location,
kind: TypeKind,
};
if let Err(err) = self.types.register(fqsn.clone(), name_spec) {
errors.push(err);
} else {
self.add_symbol(id, fqsn, spec);
};
};
let TypeBody(variants) = type_body;
let new_scope = Scope::Name(type_name.name.clone());
scope_stack.push(new_scope);
for (index, variant) in variants.iter().enumerate() {
let tag = index as u32;
let Variant { name, kind, id } = variant;
let type_id = TypeId::lookup_name(name.as_ref());
match kind {
VariantKind::UnitStruct => {
let fq_name = Fqsn::from_scope_stack(scope_stack.as_ref(), name.clone());
let spec = SymbolSpec::DataConstructor {
tag,
arity: 0,
type_id,
};
register(id, fq_name, spec);
}
VariantKind::TupleStruct(items) => {
let fq_name = Fqsn::from_scope_stack(scope_stack.as_ref(), name.clone());
let spec = SymbolSpec::DataConstructor {
tag,
arity: items.len(),
type_id,
};
register(id, fq_name, spec);
}
VariantKind::Record(members) => {
let fq_name = Fqsn::from_scope_stack(scope_stack.as_ref(), name.clone());
let mut seen_members = HashMap::new();
for (member_name, _) in members.iter() {
match seen_members.entry(member_name.as_ref()) {
Entry::Occupied(o) => {
let location = *o.get();
member_errors.push(SymbolError::DuplicateRecord {
type_name: fq_name.clone(),
location,
member: member_name.as_ref().to_string(),
});
}
//TODO eventually this should track meaningful locations
Entry::Vacant(v) => {
v.insert(Location::default());
}
}
}
let spec = SymbolSpec::RecordConstructor {
tag,
type_id,
members: members
.iter()
.map(|(member_name, _type_identifier)| {
(
member_name.clone(),
TypeId::lookup_name("DUMMY_TYPE_ID"),
)
})
.collect(),
};
register(id, fq_name, spec);
}
}
}
scope_stack.pop();
errors.extend(member_errors.into_iter());
errors
}
}