schala/schala-lang/language/src/symbol_table/mod.rs

539 lines
19 KiB
Rust
Raw Normal View History

2021-10-29 17:27:21 -07:00
#![allow(clippy::enum_variant_names)]
use std::{
collections::{hash_map::Entry, HashMap, HashSet},
fmt,
rc::Rc,
};
use crate::{
ast,
ast::{
Declaration, ItemId, ModuleSpecifier, Statement, StatementKind, TypeBody, TypeSingletonName, Variant,
VariantKind,
},
builtin::Builtin,
tokenizing::Location,
2021-10-29 17:27:21 -07:00
type_inference::{self, PendingType, TypeBuilder, TypeContext, TypeId, VariantBuilder},
2021-10-21 14:46:42 -07:00
};
2021-10-19 13:48:00 -07:00
mod resolver;
2019-10-17 03:15:39 -07:00
mod symbol_trie;
use symbol_trie::SymbolTrie;
2019-10-16 19:51:43 -07:00
mod test;
use crate::identifier::{define_id_kind, Id, IdStore};
2019-10-16 19:51:43 -07:00
define_id_kind!(DefItem);
pub type DefId = Id<DefItem>;
/// Fully-qualified symbol name
#[derive(Debug, Clone, Eq, PartialEq, Hash, PartialOrd, Ord)]
2021-10-19 21:14:15 -07:00
pub struct Fqsn {
2021-10-21 14:46:42 -07:00
//TODO Fqsn's need to be cheaply cloneable
2021-11-02 01:20:30 -07:00
scopes: Vec<ScopeSegment>,
}
2021-10-19 21:14:15 -07:00
impl Fqsn {
2021-11-02 01:20:30 -07:00
fn from_scope_stack(scopes: &[ScopeSegment], new_name: Rc<String>) -> Self {
2021-10-21 14:46:42 -07:00
let mut v = Vec::new();
for s in scopes {
v.push(s.clone());
}
2021-11-02 01:20:30 -07:00
v.push(ScopeSegment::Name(new_name));
2021-10-21 14:46:42 -07:00
Fqsn { scopes: v }
2021-10-19 17:22:35 -07:00
}
2021-10-21 14:46:42 -07:00
fn from_strs(strs: &[&str]) -> Fqsn {
let mut scopes = vec![];
for s in strs {
2021-11-02 01:20:30 -07:00
scopes.push(ScopeSegment::Name(Rc::new(s.to_string())));
2021-10-21 14:46:42 -07:00
}
Fqsn { scopes }
2021-10-19 17:22:35 -07:00
}
2021-10-21 21:55:21 -07:00
fn local_name(&self) -> Rc<String> {
2021-11-02 01:20:30 -07:00
let ScopeSegment::Name(name) = self.scopes.last().unwrap();
2021-10-21 21:55:21 -07:00
name.clone()
}
}
2021-10-25 12:47:35 -07:00
impl fmt::Display for Fqsn {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
let delim = "::";
let Fqsn { scopes } = self;
write!(f, "FQSN<{}", scopes[0])?;
for item in scopes[1..].iter() {
write!(f, "{}{}", delim, item)?;
}
write!(f, ">")
}
}
//TODO eventually this should use ItemId's to avoid String-cloning
/// One segment within a scope.
#[derive(Debug, Clone, Eq, PartialEq, Hash, PartialOrd, Ord)]
2021-11-02 01:20:30 -07:00
enum ScopeSegment {
2021-10-21 14:46:42 -07:00
Name(Rc<String>),
}
2021-11-02 01:20:30 -07:00
impl fmt::Display for ScopeSegment {
2021-10-25 12:47:35 -07:00
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
2021-11-02 01:20:30 -07:00
let ScopeSegment::Name(name) = self;
2021-10-25 12:47:35 -07:00
write!(f, "{}", name)
}
}
2021-10-19 13:54:32 -07:00
#[allow(dead_code)]
#[derive(Debug, Clone)]
2021-10-19 19:19:21 -07:00
pub enum SymbolError {
DuplicateName { prev_name: Fqsn, location: Location },
DuplicateVariant { type_fqsn: Fqsn, name: String },
DuplicateRecord { type_name: Fqsn, location: Location, member: String },
}
2021-10-19 13:54:32 -07:00
#[allow(dead_code)]
#[derive(Debug)]
struct NameSpec<K> {
2021-10-21 14:46:42 -07:00
location: Location,
kind: K,
}
#[derive(Debug)]
enum NameKind {
2021-10-21 14:46:42 -07:00
Module,
Function,
Binding,
}
#[derive(Debug)]
struct TypeKind;
/// Keeps track of what names were used in a given namespace.
struct NameTable<K> {
2021-10-21 14:46:42 -07:00
table: HashMap<Fqsn, NameSpec<K>>,
}
impl<K> NameTable<K> {
2021-10-21 14:46:42 -07:00
fn new() -> Self {
Self { table: HashMap::new() }
2021-10-21 14:46:42 -07:00
}
fn register(&mut self, name: Fqsn, spec: NameSpec<K>) -> Result<(), SymbolError> {
match self.table.entry(name.clone()) {
Entry::Occupied(o) =>
Err(SymbolError::DuplicateName { prev_name: name, location: o.get().location }),
2021-10-21 14:46:42 -07:00
Entry::Vacant(v) => {
v.insert(spec);
Ok(())
}
}
}
}
//cf. p. 150 or so of Language Implementation Patterns
pub struct SymbolTable {
def_id_store: IdStore<DefItem>,
2021-10-21 14:46:42 -07:00
/// Used for import resolution.
symbol_trie: SymbolTrie,
2021-10-19 13:48:00 -07:00
2021-10-21 14:46:42 -07:00
/// These tables are responsible for preventing duplicate names.
fq_names: NameTable<NameKind>, //Note that presence of two tables implies that a type and other binding with the same name can co-exist
types: NameTable<TypeKind>,
2021-10-19 13:48:00 -07:00
id_to_def: HashMap<ItemId, DefId>,
def_to_symbol: HashMap<DefId, Rc<Symbol>>,
}
impl SymbolTable {
2021-10-21 14:46:42 -07:00
pub fn new() -> SymbolTable {
let mut table = SymbolTable {
def_id_store: IdStore::new(),
2021-10-21 14:46:42 -07:00
symbol_trie: SymbolTrie::new(),
fq_names: NameTable::new(),
types: NameTable::new(),
2021-10-21 21:55:21 -07:00
id_to_def: HashMap::new(),
def_to_symbol: HashMap::new(),
};
table.populate_builtins();
table
2019-03-07 23:51:31 -08:00
}
2018-08-05 18:19:48 -07:00
2021-10-21 14:46:42 -07:00
/// The main entry point into the symbol table. This will traverse the AST in several
/// different ways and populate subtables with information that will be used further in the
/// compilation process.
pub fn process_ast(
&mut self,
ast: &ast::AST,
type_context: &mut TypeContext,
) -> Result<(), Vec<SymbolError>> {
2021-10-27 15:39:09 -07:00
let mut runner = SymbolTableRunner { type_context, table: self };
let errs = runner.populate_name_tables(ast);
2021-10-21 14:46:42 -07:00
if !errs.is_empty() {
return Err(errs);
}
2021-10-27 15:39:09 -07:00
runner.resolve_scopes(ast);
2021-10-21 14:46:42 -07:00
Ok(())
}
2021-10-19 14:12:57 -07:00
2021-10-21 14:46:42 -07:00
pub fn lookup_symbol(&self, id: &ItemId) -> Option<&Symbol> {
let def = self.id_to_def.get(id)?;
self.def_to_symbol.get(def).map(|s| s.as_ref())
2021-10-19 18:22:34 -07:00
}
pub fn lookup_symbol_by_def(&self, def: &DefId) -> Option<&Symbol> {
self.def_to_symbol.get(def).map(|s| s.as_ref())
}
2021-10-26 11:37:43 -07:00
#[allow(dead_code)]
pub fn debug(&self) {
println!("Symbol table:");
println!("----------------");
for (id, def) in self.id_to_def.iter() {
if let Some(symbol) = self.def_to_symbol.get(def) {
println!("{} => {}: {}", id, def, symbol);
} else {
println!("{} => {} <NO SYMBOL FOUND>", id, def);
}
2021-10-26 11:37:43 -07:00
}
}
2021-10-27 15:39:09 -07:00
/// Register a new mapping of a fully-qualified symbol name (e.g. `Option::Some`)
/// to a Symbol, a descriptor of what that name refers to.
fn add_symbol(&mut self, id: &ItemId, fqsn: Fqsn, spec: SymbolSpec) {
let def_id = self.def_id_store.fresh();
let symbol = Rc::new(Symbol { fully_qualified_name: fqsn.clone(), spec, def_id });
self.symbol_trie.insert(&fqsn, def_id);
self.id_to_def.insert(*id, def_id);
self.def_to_symbol.insert(def_id, symbol);
2021-10-27 15:39:09 -07:00
}
fn populate_single_builtin(&mut self, fqsn: Fqsn, builtin: Builtin) {
let def_id = self.def_id_store.fresh();
let spec = SymbolSpec::Builtin(builtin);
let symbol = Rc::new(Symbol { fully_qualified_name: fqsn.clone(), spec, def_id });
self.symbol_trie.insert(&fqsn, def_id);
self.def_to_symbol.insert(def_id, symbol);
}
fn populate_builtins(&mut self) {
let fqsn = Fqsn::from_strs(&["println"]);
self.populate_single_builtin(fqsn, Builtin::IOPrintLn);
let fqsn = Fqsn::from_strs(&["print"]);
self.populate_single_builtin(fqsn, Builtin::IOPrint);
}
2021-10-27 15:39:09 -07:00
}
struct SymbolTableRunner<'a> {
type_context: &'a mut TypeContext,
table: &'a mut SymbolTable,
}
2021-10-13 00:53:32 -07:00
#[allow(dead_code)]
2021-10-21 21:55:21 -07:00
#[derive(Debug, Clone)]
pub struct Symbol {
2021-10-21 21:55:21 -07:00
fully_qualified_name: Fqsn,
2021-10-25 13:03:31 -07:00
spec: SymbolSpec,
def_id: DefId,
}
impl Symbol {
pub fn local_name(&self) -> Rc<String> {
self.fully_qualified_name.local_name()
}
pub fn def_id(&self) -> Option<DefId> {
2021-10-27 01:17:53 -07:00
Some(self.def_id)
2021-10-25 13:03:31 -07:00
}
pub fn spec(&self) -> SymbolSpec {
self.spec.clone()
}
}
2018-06-03 23:04:07 -07:00
impl fmt::Display for Symbol {
2021-10-21 14:46:42 -07:00
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
2021-10-26 11:37:43 -07:00
write!(f, "<Local name: {}, {}, Spec: {}>", self.local_name(), self.fully_qualified_name, self.spec)
2021-10-21 14:46:42 -07:00
}
2018-06-03 23:04:07 -07:00
}
//TODO - I think I eventually want to draw a distinction between true global items
//i.e. global vars, and items whose definitions are scoped. Right now there's a sense
//in which Func, DataConstructor, RecordConstructor, and GlobalBinding are "globals",
//whereas LocalVarible and FunctionParam have local scope. But right now, they all
//get put into a common table, and all get DefId's from a common source.
//
//It would be good if individual functions could in parallel look up their own
//local vars without interfering with other lookups. Also some type definitions
//should be scoped in a similar way.
2021-10-24 02:54:21 -07:00
//
//Also it makes sense that non-globals should not use DefId's, particularly not
//function parameters (even though they are currently assigned).
2021-10-21 21:55:21 -07:00
#[derive(Debug, Clone)]
pub enum SymbolSpec {
Builtin(Builtin),
Func,
DataConstructor { tag: u32, type_id: TypeId },
2021-11-01 13:46:38 -07:00
RecordConstructor { tag: u32, type_id: TypeId },
2021-10-24 00:08:26 -07:00
GlobalBinding, //Only for global variables, not for function-local ones or ones within a `let` scope context
LocalVariable,
FunctionParam(u8),
}
2018-06-03 23:04:07 -07:00
impl fmt::Display for SymbolSpec {
2021-10-21 14:46:42 -07:00
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
use self::SymbolSpec::*;
match self {
Builtin(b) => write!(f, "Builtin: {:?}", b),
Func => write!(f, "Func"),
DataConstructor { tag, type_id } => write!(f, "DataConstructor(tag: {}, type: {})", tag, type_id),
RecordConstructor { type_id, tag, .. } =>
write!(f, "RecordConstructor(tag: {})(<members> -> {})", tag, type_id),
GlobalBinding => write!(f, "GlobalBinding"),
LocalVariable => write!(f, "Local variable"),
FunctionParam(n) => write!(f, "Function param: {}", n),
2021-10-21 14:46:42 -07:00
}
2018-06-03 23:04:07 -07:00
}
}
2021-10-27 15:39:09 -07:00
impl<'a> SymbolTableRunner<'a> {
2021-10-21 14:46:42 -07:00
/* note: this adds names for *forward reference* but doesn't actually create any types. solve that problem
* later */
2021-10-21 21:55:21 -07:00
fn add_symbol(&mut self, id: &ItemId, fqsn: Fqsn, spec: SymbolSpec) {
2021-10-27 15:39:09 -07:00
self.table.add_symbol(id, fqsn, spec)
2021-10-19 18:00:34 -07:00
}
2021-10-21 14:46:42 -07:00
/// Walks the AST, matching the ID of an identifier used in some expression to
/// the corresponding Symbol.
fn resolve_scopes(&mut self, ast: &ast::AST) {
2021-10-27 15:39:09 -07:00
let mut resolver = resolver::ScopeResolver::new(self.table);
2021-10-21 14:46:42 -07:00
resolver.resolve(ast);
}
2021-10-19 18:00:34 -07:00
2021-10-21 14:46:42 -07:00
/// This function traverses the AST and adds symbol table entries for
/// constants, functions, types, and modules defined within. This simultaneously
/// checks for dupicate definitions (and returns errors if discovered), and sets
/// up name tables that will be used by further parts of the compiler
fn populate_name_tables(&mut self, ast: &ast::AST) -> Vec<SymbolError> {
let mut scope_stack = vec![];
2021-10-24 00:08:26 -07:00
self.add_from_scope(ast.statements.as_ref(), &mut scope_stack, false)
2021-10-21 14:46:42 -07:00
}
2021-10-19 13:48:00 -07:00
2021-10-27 15:39:09 -07:00
fn add_from_scope(
&mut self,
2021-10-21 14:46:42 -07:00
statements: &[Statement],
2021-11-02 01:20:30 -07:00
scope_stack: &mut Vec<ScopeSegment>,
2021-10-24 00:08:26 -07:00
function_scope: bool,
2021-10-21 14:46:42 -07:00
) -> Vec<SymbolError> {
let mut errors = vec![];
for statement in statements {
2021-11-01 21:34:45 -07:00
let Statement { id, kind, location } = statement;
2021-10-21 14:46:42 -07:00
let location = *location;
2021-10-24 00:08:26 -07:00
if let Err(err) = self.add_single_statement(id, kind, location, scope_stack, function_scope) {
2021-10-21 14:46:42 -07:00
errors.push(err);
} else {
// If there's an error with a name, don't recurse into subscopes of that name
let recursive_errs = match kind {
StatementKind::Declaration(Declaration::FuncDecl(signature, body)) => {
2021-11-02 01:20:30 -07:00
let new_scope = ScopeSegment::Name(signature.name.clone());
2021-10-21 14:46:42 -07:00
scope_stack.push(new_scope);
2021-10-24 00:08:26 -07:00
let output = self.add_from_scope(body.as_ref(), scope_stack, true);
2021-10-21 14:46:42 -07:00
scope_stack.pop();
output
}
StatementKind::Module(ModuleSpecifier { name, contents }) => {
2021-11-02 01:20:30 -07:00
let new_scope = ScopeSegment::Name(name.clone());
2021-10-21 14:46:42 -07:00
scope_stack.push(new_scope);
2021-10-24 00:08:26 -07:00
let output = self.add_from_scope(contents.as_ref(), scope_stack, false);
2021-10-21 14:46:42 -07:00
scope_stack.pop();
output
}
StatementKind::Declaration(Declaration::TypeDecl { name, body, mutable }) =>
self.add_type_members(name, body, mutable, location, scope_stack),
2021-10-21 14:46:42 -07:00
_ => vec![],
};
errors.extend(recursive_errs.into_iter());
}
}
2021-10-21 14:46:42 -07:00
errors
}
fn add_single_statement(
&mut self,
2021-10-21 21:55:21 -07:00
id: &ItemId,
2021-10-21 14:46:42 -07:00
kind: &StatementKind,
location: Location,
2021-11-02 01:20:30 -07:00
scope_stack: &[ScopeSegment],
2021-10-24 00:08:26 -07:00
function_scope: bool,
2021-10-21 14:46:42 -07:00
) -> Result<(), SymbolError> {
match kind {
StatementKind::Declaration(Declaration::FuncSig(signature)) => {
let fq_function = Fqsn::from_scope_stack(scope_stack, signature.name.clone());
self.table
.fq_names
.register(fq_function.clone(), NameSpec { location, kind: NameKind::Function })?;
self.table.types.register(fq_function.clone(), NameSpec { location, kind: TypeKind })?;
2021-10-21 14:46:42 -07:00
self.add_symbol(id, fq_function, SymbolSpec::Func);
2021-10-21 14:46:42 -07:00
}
StatementKind::Declaration(Declaration::FuncDecl(signature, ..)) => {
let fn_name = &signature.name;
let fq_function = Fqsn::from_scope_stack(scope_stack, fn_name.clone());
self.table
.fq_names
.register(fq_function.clone(), NameSpec { location, kind: NameKind::Function })?;
self.table.types.register(fq_function.clone(), NameSpec { location, kind: TypeKind })?;
2021-10-21 14:46:42 -07:00
self.add_symbol(id, fq_function, SymbolSpec::Func);
2021-10-21 14:46:42 -07:00
}
StatementKind::Declaration(Declaration::TypeDecl { name, .. }) => {
let fq_type = Fqsn::from_scope_stack(scope_stack, name.name.clone());
self.table.types.register(fq_type, NameSpec { location, kind: TypeKind })?;
2021-10-21 14:46:42 -07:00
}
StatementKind::Declaration(Declaration::Binding { name, .. }) => {
let fq_binding = Fqsn::from_scope_stack(scope_stack, name.clone());
self.table
.fq_names
.register(fq_binding.clone(), NameSpec { location, kind: NameKind::Binding })?;
2021-10-24 00:08:26 -07:00
if !function_scope {
self.add_symbol(id, fq_binding, SymbolSpec::GlobalBinding);
2021-10-24 00:08:26 -07:00
}
2021-10-21 14:46:42 -07:00
}
StatementKind::Module(ModuleSpecifier { name, .. }) => {
let fq_module = Fqsn::from_scope_stack(scope_stack, name.clone());
self.table.fq_names.register(fq_module, NameSpec { location, kind: NameKind::Module })?;
2021-10-21 14:46:42 -07:00
}
_ => (),
}
2021-10-21 14:46:42 -07:00
Ok(())
}
fn add_type_members(
&mut self,
type_name: &TypeSingletonName,
type_body: &TypeBody,
_mutable: &bool,
location: Location,
2021-11-02 01:20:30 -07:00
scope_stack: &mut Vec<ScopeSegment>,
2021-10-21 14:46:42 -07:00
) -> Vec<SymbolError> {
let (variants, immediate_variant) = match type_body {
TypeBody::Variants(variants) => (variants.clone(), false),
TypeBody::ImmediateRecord(id, fields) => (
vec![Variant {
id: *id,
name: type_name.name.clone(),
kind: VariantKind::Record(fields.clone()),
}],
true,
),
2021-10-31 02:30:38 -07:00
};
2021-10-29 00:48:44 -07:00
let type_fqsn = Fqsn::from_scope_stack(scope_stack, type_name.name.clone());
2021-11-02 01:20:30 -07:00
let new_scope = ScopeSegment::Name(type_name.name.clone());
scope_stack.push(new_scope);
// Check for duplicates before registering any types with the TypeContext
let mut seen_variants = HashSet::new();
let mut errors = vec![];
2021-10-29 00:48:44 -07:00
2021-10-31 02:30:38 -07:00
for variant in variants.iter() {
if seen_variants.contains(&variant.name) {
errors.push(SymbolError::DuplicateVariant {
2021-10-29 00:48:44 -07:00
type_fqsn: type_fqsn.clone(),
name: variant.name.as_ref().to_string(),
2021-10-29 00:48:44 -07:00
})
}
seen_variants.insert(variant.name.clone());
if let VariantKind::Record(ref members) = variant.kind {
let variant_name = Fqsn::from_scope_stack(scope_stack.as_ref(), variant.name.clone());
let mut seen_members = HashMap::new();
for (member_name, _) in members.iter() {
match seen_members.entry(member_name.as_ref()) {
Entry::Occupied(o) => {
let location = *o.get();
errors.push(SymbolError::DuplicateRecord {
type_name: variant_name.clone(),
location,
member: member_name.as_ref().to_string(),
});
}
//TODO eventually this should track meaningful locations
Entry::Vacant(v) => {
v.insert(location);
}
}
}
}
2021-10-29 00:48:44 -07:00
}
if !errors.is_empty() {
return errors;
2021-10-29 00:48:44 -07:00
}
let mut type_builder = TypeBuilder::new(type_name.name.as_ref());
2021-10-29 00:48:44 -07:00
2021-10-29 17:27:21 -07:00
let mut fqsn_id_map = HashMap::new();
for variant in variants.iter() {
2021-10-29 17:27:21 -07:00
let Variant { name, kind, id } = variant;
fqsn_id_map.insert(Fqsn::from_scope_stack(scope_stack.as_ref(), name.clone()), id);
2021-10-29 00:48:44 -07:00
let mut variant_builder = VariantBuilder::new(name.as_ref());
match kind {
VariantKind::UnitStruct => (),
VariantKind::TupleStruct(items) =>
for type_identifier in items {
let pending: PendingType = type_identifier.into();
variant_builder.add_member(pending);
},
VariantKind::Record(members) =>
for (field_name, type_identifier) in members.iter() {
let pending: PendingType = type_identifier.into();
variant_builder.add_record_member(field_name.as_ref(), pending);
},
}
type_builder.add_variant(variant_builder);
}
2021-10-29 00:48:44 -07:00
let type_id = self.type_context.register_type(type_builder);
2021-10-29 17:27:21 -07:00
let type_definition = self.type_context.lookup_type(&type_id).unwrap();
// This index is guaranteed to be the correct tag
for (index, variant) in type_definition.variants.iter().enumerate() {
let fqsn = Fqsn::from_scope_stack(scope_stack.as_ref(), Rc::new(variant.name.to_string()));
let id = fqsn_id_map.get(&fqsn).unwrap();
let tag = index as u32;
let spec = match &variant.members {
type_inference::VariantMembers::Unit => SymbolSpec::DataConstructor { tag, type_id },
type_inference::VariantMembers::Tuple(..) => SymbolSpec::DataConstructor { tag, type_id },
2021-11-01 13:46:38 -07:00
type_inference::VariantMembers::Record(..) => SymbolSpec::RecordConstructor { tag, type_id },
};
self.table.add_symbol(id, fqsn, spec);
}
if immediate_variant {
let variant = &type_definition.variants[0];
let fqsn = Fqsn::from_scope_stack(scope_stack.as_ref(), Rc::new(variant.name.to_string()));
let id = fqsn_id_map.get(&fqsn).unwrap();
let abbrev_fqsn = Fqsn::from_scope_stack(
scope_stack[0..scope_stack.len() - 1].as_ref(),
Rc::new(variant.name.to_string()),
);
let spec = SymbolSpec::RecordConstructor { tag: 0, type_id };
self.table.add_symbol(id, abbrev_fqsn, spec);
}
2021-10-21 14:46:42 -07:00
scope_stack.pop();
vec![]
2021-10-21 14:46:42 -07:00
}
2019-01-20 00:22:35 -08:00
}