schala/schala-lang/language/src/symbol_table/mod.rs

383 lines
13 KiB
Rust
Raw Normal View History

2021-10-19 20:35:53 -07:00
use std::collections::{HashMap, hash_map::Entry};
use std::rc::Rc;
use std::fmt;
2021-10-19 13:54:32 -07:00
use crate::tokenizing::Location;
2019-01-07 13:00:37 -08:00
use crate::ast;
2021-10-19 14:19:26 -07:00
use crate::ast::{ItemId, TypeBody, Variant, TypeSingletonName, Declaration, Statement, StatementKind, ModuleSpecifier};
2019-01-07 13:00:37 -08:00
use crate::typechecking::TypeName;
2021-10-19 13:48:00 -07:00
mod resolver;
2019-10-17 03:15:39 -07:00
mod symbol_trie;
use symbol_trie::SymbolTrie;
2019-10-16 19:51:43 -07:00
mod test;
/// Fully-qualified symbol name
#[derive(Debug, Clone, Eq, PartialEq, Hash, PartialOrd, Ord)]
2021-10-19 21:14:15 -07:00
pub struct Fqsn {
//TODO Fqsn's need to be cheaply cloneable
2021-10-19 13:48:00 -07:00
scopes: Vec<Scope>, //TODO rename to ScopeSegment
}
2021-10-19 21:14:15 -07:00
impl Fqsn {
2021-10-19 17:22:35 -07:00
fn from_scope_stack(scopes: &[Scope], new_name: Rc<String>) -> Self {
let mut v = Vec::new();
for s in scopes {
v.push(s.clone());
}
v.push(Scope::Name(new_name));
2021-10-19 21:14:15 -07:00
Fqsn { scopes: v }
}
2021-10-19 17:22:35 -07:00
#[cfg(test)]
2021-10-19 21:14:15 -07:00
fn from_strs(strs: &[&str]) -> Fqsn {
2021-10-19 17:22:35 -07:00
let mut scopes = vec![];
for s in strs {
scopes.push(Scope::Name(Rc::new(s.to_string())));
}
2021-10-19 21:14:15 -07:00
Fqsn {
2021-10-19 17:22:35 -07:00
scopes
}
}
}
2021-10-19 17:22:35 -07:00
//TODO eventually this should use ItemId's to avoid String-cloning
/// One segment within a scope.
#[derive(Debug, Clone, Eq, PartialEq, Hash, PartialOrd, Ord)]
enum Scope {
2021-10-19 17:22:35 -07:00
Name(Rc<String>)
}
2021-10-19 13:54:32 -07:00
#[allow(dead_code)]
#[derive(Debug, Clone)]
2021-10-19 19:19:21 -07:00
pub enum SymbolError {
DuplicateName {
2021-10-19 21:14:15 -07:00
prev_name: Fqsn,
2021-10-19 19:19:21 -07:00
location: Location
2021-10-19 20:35:53 -07:00
},
DuplicateRecord {
2021-10-19 21:14:15 -07:00
type_name: Fqsn,
2021-10-19 20:35:53 -07:00
location: Location,
member: String,
2021-10-19 19:19:21 -07:00
}
}
2021-10-19 13:54:32 -07:00
#[allow(dead_code)]
#[derive(Debug)]
struct NameSpec<K> {
location: Location,
kind: K
}
#[derive(Debug)]
enum NameKind {
Module,
Function,
Binding,
}
#[derive(Debug)]
struct TypeKind;
/// Keeps track of what names were used in a given namespace.
struct NameTable<K> {
2021-10-19 21:14:15 -07:00
table: HashMap<Fqsn, NameSpec<K>>
}
impl<K> NameTable<K> {
fn new() -> Self {
Self { table: HashMap::new() }
}
2021-10-19 21:14:15 -07:00
fn register(&mut self, name: Fqsn, spec: NameSpec<K>) -> Result<(), SymbolError> {
match self.table.entry(name.clone()) {
Entry::Occupied(o) => {
2021-10-19 19:19:21 -07:00
Err(SymbolError::DuplicateName { prev_name: name, location: o.get().location })
},
Entry::Vacant(v) => {
v.insert(spec);
Ok(())
}
}
}
}
//cf. p. 150 or so of Language Implementation Patterns
pub struct SymbolTable {
2021-10-19 14:12:57 -07:00
/// Used for import resolution.
2019-10-17 03:15:39 -07:00
symbol_trie: SymbolTrie,
2021-10-19 13:48:00 -07:00
/// These tables are responsible for preventing duplicate names.
2021-10-18 22:51:36 -07:00
fq_names: NameTable<NameKind>, //Note that presence of two tables implies that a type and other binding with the same name can co-exist
types: NameTable<TypeKind>,
2021-10-19 13:48:00 -07:00
2021-10-19 21:14:15 -07:00
/// A map of the `ItemId`s of instances of use of names to their fully-canonicalized Fqsn form.
2021-10-19 14:12:57 -07:00
/// Updated by the item id resolver.
2021-10-19 21:14:15 -07:00
id_to_fqsn: HashMap<ItemId, Fqsn>,
2021-10-19 13:48:00 -07:00
2021-10-19 21:14:15 -07:00
/// A map of the Fqsn of an AST definition to a Symbol data structure, which contains
2021-10-19 13:48:00 -07:00
/// some basic information about what that symbol is and (ideally) references to other tables
/// (e.g. typechecking tables) with more information about that symbol.
2021-10-19 21:14:15 -07:00
fqsn_to_symbol: HashMap<Fqsn, Symbol>,
}
impl SymbolTable {
pub fn new() -> SymbolTable {
2019-03-07 23:51:31 -08:00
SymbolTable {
symbol_trie: SymbolTrie::new(),
fq_names: NameTable::new(),
types: NameTable::new(),
2021-10-19 13:48:00 -07:00
id_to_fqsn: HashMap::new(),
fqsn_to_symbol: HashMap::new(),
2019-03-07 23:51:31 -08:00
}
}
2018-08-05 18:19:48 -07:00
2021-10-19 14:12:57 -07:00
/// The main entry point into the symbol table. This will traverse the AST in several
/// different ways and populate subtables with information that will be used further in the
/// compilation process.
2021-10-19 19:19:21 -07:00
pub fn process_ast(&mut self, ast: &ast::AST) -> Result<(), Vec<SymbolError>> {
2021-10-19 14:12:57 -07:00
2021-10-19 18:22:34 -07:00
let errs = self.populate_name_tables(ast);
if !errs.is_empty() {
2021-10-19 19:19:21 -07:00
return Err(errs);
2021-10-19 18:22:34 -07:00
}
2021-10-19 19:19:21 -07:00
self.resolve_symbol_ids(ast);
2021-10-19 14:12:57 -07:00
Ok(())
}
2021-10-19 13:48:00 -07:00
pub fn lookup_symbol(&self, id: &ItemId) -> Option<&Symbol> {
let fqsn = self.id_to_fqsn.get(id);
fqsn.and_then(|fqsn| self.fqsn_to_symbol.get(fqsn))
}
}
2021-10-13 00:53:32 -07:00
#[allow(dead_code)]
#[derive(Debug)]
pub struct Symbol {
2021-10-14 06:52:50 -07:00
pub local_name: Rc<String>,
2021-10-19 13:48:00 -07:00
//fully_qualified_name: FullyQualifiedSymbolName,
pub spec: SymbolSpec,
}
2018-06-03 23:04:07 -07:00
impl fmt::Display for Symbol {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "<Local name: {}, Spec: {}>", self.local_name, self.spec)
2018-06-03 23:04:07 -07:00
}
}
#[derive(Debug)]
pub enum SymbolSpec {
2018-06-03 02:39:49 -07:00
Func(Vec<TypeName>),
2018-05-30 23:54:24 -07:00
DataConstructor {
index: usize,
2021-10-19 16:50:08 -07:00
arity: usize,
2021-10-19 13:48:00 -07:00
type_name: TypeName, //TODO this eventually needs to be some kind of ID
2018-05-30 23:54:24 -07:00
},
2019-01-25 00:57:01 -08:00
RecordConstructor {
index: usize,
members: HashMap<Rc<String>, TypeName>,
type_name: TypeName,
},
2019-10-16 20:22:40 -07:00
Binding,
}
2018-06-03 23:04:07 -07:00
impl fmt::Display for SymbolSpec {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
use self::SymbolSpec::*;
match self {
Func(type_names) => write!(f, "Func({:?})", type_names),
2021-10-19 16:50:08 -07:00
DataConstructor { index, type_name, arity } => write!(f, "DataConstructor(idx: {}, arity: {}, type: {})", index, arity, type_name),
RecordConstructor { type_name, index, ..} => write!(f, "RecordConstructor(idx: {})(<members> -> {})", index, type_name),
Binding => write!(f, "Binding"),
2018-06-03 23:04:07 -07:00
}
}
}
impl SymbolTable {
/* note: this adds names for *forward reference* but doesn't actually create any types. solve that problem
* later */
2021-10-19 16:42:48 -07:00
/// Register a new mapping of a fully-qualified symbol name (e.g. `Option::Some`)
/// to a Symbol, a descriptor of what that name refers to.
2021-10-19 21:14:15 -07:00
fn add_symbol(&mut self, fqsn: Fqsn, symbol: Symbol) {
2021-10-19 16:42:48 -07:00
self.symbol_trie.insert(&fqsn);
self.fqsn_to_symbol.insert(fqsn, symbol);
}
2021-10-19 13:48:00 -07:00
/// Walks the AST, matching the ID of an identifier used in some expression to
/// the corresponding Symbol.
2021-10-19 19:19:21 -07:00
fn resolve_symbol_ids(&mut self, ast: &ast::AST) {
2021-10-19 13:48:00 -07:00
let mut resolver = resolver::Resolver::new(self);
2021-10-19 19:19:21 -07:00
resolver.resolve(ast);
2021-10-18 23:41:29 -07:00
}
/// This function traverses the AST and adds symbol table entries for
/// constants, functions, types, and modules defined within. This simultaneously
/// checks for dupicate definitions (and returns errors if discovered), and sets
/// up name tables that will be used by further parts of the compiler
2021-10-19 19:19:21 -07:00
fn populate_name_tables(&mut self, ast: &ast::AST) -> Vec<SymbolError> {
2021-10-19 13:48:00 -07:00
let mut scope_stack = vec![];
self.add_from_scope(ast.statements.as_ref(), &mut scope_stack)
}
2021-10-19 19:19:21 -07:00
fn add_from_scope<'a>(&'a mut self, statements: &[Statement], scope_stack: &mut Vec<Scope>) -> Vec<SymbolError> {
2021-10-19 18:00:34 -07:00
let mut errors = vec![];
for statement in statements {
2021-10-19 17:22:35 -07:00
let Statement { id: _, kind, location } = statement; //TODO I'm not sure if I need to do anything with this ID
let location = *location;
2021-10-19 21:18:57 -07:00
if let Err(err) = self.add_single_statement(kind, location, scope_stack) {
2021-10-19 18:00:34 -07:00
errors.push(err);
2021-10-19 19:19:21 -07:00
} else { // If there's an error with a name, don't recurse into subscopes of that name
let recursive_errs = match kind {
StatementKind::Declaration(Declaration::FuncDecl(signature, body)) => {
let new_scope = Scope::Name(signature.name.clone());
scope_stack.push(new_scope);
let output = self.add_from_scope(body.as_ref(), scope_stack);
scope_stack.pop();
output
}
StatementKind::Module(ModuleSpecifier { name, contents }) => {
let new_scope = Scope::Name(name.clone());
scope_stack.push(new_scope);
let output = self.add_from_scope(contents.as_ref(), scope_stack);
scope_stack.pop();
output
}
StatementKind::Declaration(Declaration::TypeDecl { name, body, mutable }) => {
self.add_type_members(name, body, mutable, location, scope_stack)
}
_ => vec![]
};
errors.extend(recursive_errs.into_iter());
2021-10-19 18:00:34 -07:00
}
}
2021-10-19 18:22:34 -07:00
errors
2021-10-19 18:00:34 -07:00
}
2021-10-19 21:18:57 -07:00
fn add_single_statement(&mut self, kind: &StatementKind, location: Location, scope_stack: &[Scope]) -> Result<(), SymbolError> {
match kind {
StatementKind::Declaration(Declaration::FuncSig(signature)) => {
2021-10-19 21:18:57 -07:00
let fq_function = Fqsn::from_scope_stack(scope_stack, signature.name.clone());
self.fq_names.register(fq_function.clone(), NameSpec { location, kind: NameKind::Function })?;
2021-10-19 13:48:00 -07:00
self.types.register(fq_function.clone(), NameSpec { location, kind: TypeKind } )?;
2021-10-19 14:12:57 -07:00
self.add_symbol(fq_function, Symbol {
2021-10-19 13:48:00 -07:00
local_name: signature.name.clone(),
spec: SymbolSpec::Func(vec![]), //TODO does this inner vec need to exist at all?
});
}
2021-10-19 18:00:34 -07:00
StatementKind::Declaration(Declaration::FuncDecl(signature, ..)) => {
2021-10-19 17:22:35 -07:00
let fn_name = &signature.name;
2021-10-19 21:18:57 -07:00
let fq_function = Fqsn::from_scope_stack(scope_stack, fn_name.clone());
self.fq_names.register(fq_function.clone(), NameSpec { location, kind: NameKind::Function })?;
2021-10-19 13:48:00 -07:00
self.types.register(fq_function.clone(), NameSpec { location, kind: TypeKind } )?;
2021-10-19 14:12:57 -07:00
self.add_symbol(fq_function, Symbol {
2021-10-19 13:48:00 -07:00
local_name: signature.name.clone(),
spec: SymbolSpec::Func(vec![]), //TODO does this inner vec need to exist at all?
});
},
2021-10-19 18:00:34 -07:00
StatementKind::Declaration(Declaration::TypeDecl { name, .. }) => {
2021-10-19 21:18:57 -07:00
let fq_type = Fqsn::from_scope_stack(scope_stack, name.name.clone());
self.types.register(fq_type, NameSpec { location, kind: TypeKind } )?;
},
StatementKind::Declaration(Declaration::Binding { name, .. }) => {
2021-10-19 21:18:57 -07:00
let fq_binding = Fqsn::from_scope_stack(scope_stack, name.clone());
2021-10-19 13:48:00 -07:00
self.fq_names.register(fq_binding.clone(), NameSpec { location, kind: NameKind::Binding })?;
2021-10-19 14:12:57 -07:00
self.add_symbol(fq_binding, Symbol {
2021-10-19 13:48:00 -07:00
local_name: name.clone(),
spec: SymbolSpec::Binding,
});
}
2021-10-19 18:00:34 -07:00
StatementKind::Module(ModuleSpecifier { name, .. }) => {
2021-10-19 21:18:57 -07:00
let fq_module = Fqsn::from_scope_stack(scope_stack, name.clone());
self.fq_names.register(fq_module, NameSpec { location, kind: NameKind::Module })?;
},
_ => (),
}
2021-10-19 18:00:34 -07:00
Ok(())
}
2021-10-19 19:19:21 -07:00
fn add_type_members(&mut self, type_name: &TypeSingletonName, type_body: &TypeBody, _mutable: &bool, location: Location, scope_stack: &mut Vec<Scope>) -> Vec<SymbolError> {
2021-10-19 20:35:53 -07:00
let mut member_errors = vec![];
let mut errors = vec![];
2021-10-19 21:14:15 -07:00
let mut register = |fqsn: Fqsn, spec: SymbolSpec| {
2021-10-19 13:48:00 -07:00
let name_spec = NameSpec { location, kind: TypeKind };
if let Err(err) = self.types.register(fqsn.clone(), name_spec) {
errors.push(err);
2021-10-19 13:48:00 -07:00
} else {
let local_name = match spec {
SymbolSpec::DataConstructor { ref type_name, ..} | SymbolSpec::RecordConstructor { ref type_name, .. } => type_name.clone(),
_ => panic!("This should never happen"),
};
let symbol = Symbol { local_name, spec };
2021-10-19 14:12:57 -07:00
self.add_symbol(fqsn, symbol);
2021-10-19 13:48:00 -07:00
};
};
let TypeBody(variants) = type_body;
2021-10-19 17:22:35 -07:00
let new_scope = Scope::Name(type_name.name.clone());
scope_stack.push(new_scope);
2021-10-19 13:48:00 -07:00
for (index, variant) in variants.iter().enumerate() {
match variant {
2021-10-19 13:48:00 -07:00
Variant::UnitStruct(name) => {
2021-10-19 21:14:15 -07:00
let fq_name = Fqsn::from_scope_stack(scope_stack.as_ref(), name.clone());
2021-10-19 13:48:00 -07:00
let spec = SymbolSpec::DataConstructor {
index,
2021-10-19 16:50:08 -07:00
arity: 0,
2021-10-19 13:48:00 -07:00
type_name: name.clone(),
};
register(fq_name, spec);
},
Variant::TupleStruct(name, items) => {
2021-10-19 21:14:15 -07:00
let fq_name = Fqsn::from_scope_stack(scope_stack.as_ref(), name.clone());
2021-10-19 13:48:00 -07:00
let spec = SymbolSpec::DataConstructor {
index,
2021-10-19 16:50:08 -07:00
arity: items.len(),
2021-10-19 13:48:00 -07:00
type_name: name.clone(),
};
register(fq_name, spec);
},
Variant::Record { name, members } => {
2021-10-19 21:14:15 -07:00
let fq_name = Fqsn::from_scope_stack(scope_stack.as_ref(), name.clone());
2021-10-19 20:35:53 -07:00
let mut seen_members = HashMap::new();
for (member_name, _) in members.iter() {
match seen_members.entry(member_name.as_ref()) {
Entry::Occupied(o) => {
let location = *o.get();
member_errors.push(SymbolError::DuplicateRecord {
type_name: fq_name.clone(),
location,
member: member_name.as_ref().to_string(),
});
},
//TODO eventually this should track meaningful locations
Entry::Vacant(v) => { v.insert(Location::default()); }
}
}
2021-10-19 13:48:00 -07:00
let spec = SymbolSpec::RecordConstructor {
index,
type_name: name.clone(),
members: members.iter()
.map(|(_, _)| (Rc::new("DUMMY_FIELD".to_string()), Rc::new("DUMMY_TYPE_ID".to_string()))).collect()
};
register(fq_name, spec);
}
}
}
scope_stack.pop();
2021-10-19 20:35:53 -07:00
errors.extend(member_errors.into_iter());
2021-10-19 18:22:34 -07:00
errors
}
2019-01-20 00:22:35 -08:00
}