schala/schala-lang/language/src/symbol_table/mod.rs

602 lines
22 KiB
Rust

#![allow(clippy::enum_variant_names)]
use std::{
collections::{hash_map::Entry, HashMap, HashSet},
fmt,
rc::Rc,
str::FromStr,
};
use crate::{
ast,
ast::{
Declaration, Expression, ExpressionKind, ItemId, ModuleSpecifier, Statement, StatementKind, TypeBody,
TypeSingletonName, Variant, VariantKind,
},
builtin::Builtin,
tokenizing::Location,
type_inference::{self, PendingType, TypeBuilder, TypeContext, TypeId, VariantBuilder},
};
mod resolver;
mod symbol_trie;
use symbol_trie::SymbolTrie;
mod test;
use crate::identifier::{define_id_kind, Id, IdStore};
define_id_kind!(DefItem);
pub type DefId = Id<DefItem>;
/// Fully-qualified symbol name
#[derive(Debug, Clone, Eq, PartialEq, Hash, PartialOrd, Ord)]
pub struct Fqsn {
//TODO Fqsn's need to be cheaply cloneable
scopes: Vec<ScopeSegment>,
}
impl Fqsn {
fn from_scope_stack(scopes: &[ScopeSegment], new_name: Rc<String>) -> Self {
let mut v = Vec::new();
for s in scopes {
v.push(s.clone());
}
v.push(ScopeSegment::Name(new_name));
Fqsn { scopes: v }
}
#[allow(dead_code)]
fn from_strs(strs: &[&str]) -> Fqsn {
let mut scopes = vec![];
for s in strs {
scopes.push(ScopeSegment::Name(Rc::new(s.to_string())));
}
Fqsn { scopes }
}
fn last_elem(&self) -> Rc<String> {
let ScopeSegment::Name(name) = self.scopes.last().unwrap();
name.clone()
}
}
impl fmt::Display for Fqsn {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
let delim = "::";
let Fqsn { scopes } = self;
write!(f, "FQSN<{}", scopes[0])?;
for item in scopes[1..].iter() {
write!(f, "{}{}", delim, item)?;
}
write!(f, ">")
}
}
//TODO eventually this should use ItemId's to avoid String-cloning
/// One segment within a scope.
#[derive(Debug, Clone, Eq, PartialEq, Hash, PartialOrd, Ord)]
enum ScopeSegment {
Name(Rc<String>),
}
impl fmt::Display for ScopeSegment {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
let ScopeSegment::Name(name) = self;
write!(f, "{}", name)
}
}
#[allow(dead_code)]
#[derive(Debug, Clone)]
pub enum SymbolError {
DuplicateName { prev_name: Fqsn, location: Location },
DuplicateVariant { type_fqsn: Fqsn, name: String },
DuplicateRecord { type_name: Fqsn, location: Location, member: String },
UnknownAnnotation { name: String },
BadAnnotation { name: String, msg: String },
}
#[allow(dead_code)]
#[derive(Debug)]
struct NameSpec<K> {
location: Location,
kind: K,
}
#[derive(Debug)]
enum NameKind {
Module,
Function,
Binding,
}
#[derive(Debug)]
struct TypeKind;
/// Keeps track of what names were used in a given namespace.
struct NameTable<K> {
table: HashMap<Fqsn, NameSpec<K>>,
}
impl<K> NameTable<K> {
fn new() -> Self {
Self { table: HashMap::new() }
}
fn register(&mut self, name: Fqsn, spec: NameSpec<K>) -> Result<(), SymbolError> {
match self.table.entry(name.clone()) {
Entry::Occupied(o) =>
Err(SymbolError::DuplicateName { prev_name: name, location: o.get().location }),
Entry::Vacant(v) => {
v.insert(spec);
Ok(())
}
}
}
}
//cf. p. 150 or so of Language Implementation Patterns
pub struct SymbolTable {
def_id_store: IdStore<DefItem>,
/// Used for import resolution.
symbol_trie: SymbolTrie,
/// These tables are responsible for preventing duplicate names.
fq_names: NameTable<NameKind>, //Note that presence of two tables implies that a type and other binding with the same name can co-exist
types: NameTable<TypeKind>,
id_to_def: HashMap<ItemId, DefId>,
def_to_symbol: HashMap<DefId, Rc<Symbol>>,
}
impl SymbolTable {
pub fn new() -> SymbolTable {
let mut table = SymbolTable {
def_id_store: IdStore::new(),
symbol_trie: SymbolTrie::new(),
fq_names: NameTable::new(),
types: NameTable::new(),
id_to_def: HashMap::new(),
def_to_symbol: HashMap::new(),
};
table.populate_builtins();
table
}
/// The main entry point into the symbol table. This will traverse the AST in several
/// different ways and populate subtables with information that will be used further in the
/// compilation process.
pub fn process_ast(
&mut self,
ast: &ast::AST,
type_context: &mut TypeContext,
) -> Result<(), Vec<SymbolError>> {
let mut runner = SymbolTableRunner { type_context, table: self };
let errs = runner.populate_name_tables(ast);
if !errs.is_empty() {
return Err(errs);
}
runner.resolve_scopes(ast);
Ok(())
}
pub fn lookup_symbol(&self, id: &ItemId) -> Option<&Symbol> {
let def = self.id_to_def.get(id)?;
self.def_to_symbol.get(def).map(|s| s.as_ref())
}
pub fn lookup_symbol_by_def(&self, def: &DefId) -> Option<&Symbol> {
self.def_to_symbol.get(def).map(|s| s.as_ref())
}
#[allow(dead_code)]
pub fn debug(&self) {
println!("Symbol table:");
println!("----------------");
for (id, def) in self.id_to_def.iter() {
if let Some(symbol) = self.def_to_symbol.get(def) {
println!("{} => {}: {}", id, def, symbol);
} else {
println!("{} => {} <NO SYMBOL FOUND>", id, def);
}
}
}
/// Register a new mapping of a fully-qualified symbol name (e.g. `Option::Some`)
/// to a Symbol, a descriptor of what that name refers to.
fn add_symbol(&mut self, id: &ItemId, fqsn: Fqsn, spec: SymbolSpec) {
let def_id = self.def_id_store.fresh();
let symbol = Rc::new(Symbol { fully_qualified_name: fqsn.clone(), spec, def_id });
self.symbol_trie.insert(&fqsn, def_id);
self.id_to_def.insert(*id, def_id);
self.def_to_symbol.insert(def_id, symbol);
}
fn populate_single_builtin(&mut self, fqsn: Fqsn, builtin: Builtin) {
let def_id = self.def_id_store.fresh();
let spec = SymbolSpec::Builtin(builtin);
let symbol = Rc::new(Symbol { fully_qualified_name: fqsn.clone(), spec, def_id });
self.symbol_trie.insert(&fqsn, def_id);
self.def_to_symbol.insert(def_id, symbol);
}
fn populate_builtins(&mut self) {
/*
let fqsn = Fqsn::from_strs(&["println"]);
self.populate_single_builtin(fqsn, Builtin::IOPrintLn);
let fqsn = Fqsn::from_strs(&["print"]);
self.populate_single_builtin(fqsn, Builtin::IOPrint);
*/
}
}
struct SymbolTableRunner<'a> {
type_context: &'a mut TypeContext,
table: &'a mut SymbolTable,
}
#[allow(dead_code)]
#[derive(Debug, Clone)]
pub struct Symbol {
fully_qualified_name: Fqsn,
spec: SymbolSpec,
def_id: DefId,
}
impl Symbol {
pub fn local_name(&self) -> Rc<String> {
self.fully_qualified_name.last_elem()
}
pub fn def_id(&self) -> Option<DefId> {
Some(self.def_id)
}
pub fn spec(&self) -> SymbolSpec {
self.spec.clone()
}
}
impl fmt::Display for Symbol {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "<Local name: {}, {}, Spec: {}>", self.local_name(), self.fully_qualified_name, self.spec)
}
}
//TODO - I think I eventually want to draw a distinction between true global items
//i.e. global vars, and items whose definitions are scoped. Right now there's a sense
//in which Func, DataConstructor, RecordConstructor, and GlobalBinding are "globals",
//whereas LocalVarible and FunctionParam have local scope. But right now, they all
//get put into a common table, and all get DefId's from a common source.
//
//It would be good if individual functions could in parallel look up their own
//local vars without interfering with other lookups. Also some type definitions
//should be scoped in a similar way.
//
//Also it makes sense that non-globals should not use DefId's, particularly not
//function parameters (even though they are currently assigned).
#[derive(Debug, Clone)]
pub enum SymbolSpec {
Builtin(Builtin),
Func,
DataConstructor { tag: u32, type_id: TypeId },
RecordConstructor { tag: u32, type_id: TypeId },
GlobalBinding, //Only for global variables, not for function-local ones or ones within a `let` scope context
LocalVariable,
FunctionParam(u8),
}
impl fmt::Display for SymbolSpec {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
use self::SymbolSpec::*;
match self {
Builtin(b) => write!(f, "Builtin: {:?}", b),
Func => write!(f, "Func"),
DataConstructor { tag, type_id } => write!(f, "DataConstructor(tag: {}, type: {})", tag, type_id),
RecordConstructor { type_id, tag, .. } =>
write!(f, "RecordConstructor(tag: {})(<members> -> {})", tag, type_id),
GlobalBinding => write!(f, "GlobalBinding"),
LocalVariable => write!(f, "Local variable"),
FunctionParam(n) => write!(f, "Function param: {}", n),
}
}
}
impl<'a> SymbolTableRunner<'a> {
/* note: this adds names for *forward reference* but doesn't actually create any types. solve that problem
* later */
fn add_symbol(&mut self, id: &ItemId, fqsn: Fqsn, spec: SymbolSpec) {
self.table.add_symbol(id, fqsn, spec)
}
/// Walks the AST, matching the ID of an identifier used in some expression to
/// the corresponding Symbol.
fn resolve_scopes(&mut self, ast: &ast::AST) {
let mut resolver = resolver::ScopeResolver::new(self.table);
resolver.resolve(ast);
}
/// This function traverses the AST and adds symbol table entries for
/// constants, functions, types, and modules defined within. This simultaneously
/// checks for dupicate definitions (and returns errors if discovered), and sets
/// up name tables that will be used by further parts of the compiler
fn populate_name_tables(&mut self, ast: &ast::AST) -> Vec<SymbolError> {
let mut scope_stack = vec![];
self.add_from_scope(ast.statements.as_ref(), &mut scope_stack, false)
}
fn add_from_scope(
&mut self,
statements: &[Statement],
scope_stack: &mut Vec<ScopeSegment>,
function_scope: bool,
) -> Vec<SymbolError> {
let mut errors = vec![];
for statement in statements {
let Statement { id, kind, location } = statement;
let location = *location;
if let Err(err) = self.add_single_statement(id, kind, location, scope_stack, function_scope) {
errors.push(err);
} else {
// If there's an error with a name, don't recurse into subscopes of that name
let recursive_errs = match kind {
StatementKind::Declaration(Declaration::FuncDecl(signature, body)) => {
let new_scope = ScopeSegment::Name(signature.name.clone());
scope_stack.push(new_scope);
let output = self.add_from_scope(body.as_ref(), scope_stack, true);
scope_stack.pop();
output
}
StatementKind::Module(ModuleSpecifier { name, contents }) => {
let new_scope = ScopeSegment::Name(name.clone());
scope_stack.push(new_scope);
let output = self.add_from_scope(contents.as_ref(), scope_stack, false);
scope_stack.pop();
output
}
StatementKind::Declaration(Declaration::TypeDecl { name, body, mutable }) =>
self.add_type_members(name, body, mutable, location, scope_stack),
_ => vec![],
};
errors.extend(recursive_errs.into_iter());
}
}
errors
}
fn add_single_statement(
&mut self,
id: &ItemId,
kind: &StatementKind,
location: Location,
scope_stack: &[ScopeSegment],
function_scope: bool,
) -> Result<(), SymbolError> {
match kind {
StatementKind::Declaration(Declaration::FuncSig(signature)) => {
let fq_function = Fqsn::from_scope_stack(scope_stack, signature.name.clone());
self.table
.fq_names
.register(fq_function.clone(), NameSpec { location, kind: NameKind::Function })?;
self.table.types.register(fq_function.clone(), NameSpec { location, kind: TypeKind })?;
self.add_symbol(id, fq_function, SymbolSpec::Func);
}
StatementKind::Declaration(Declaration::FuncDecl(signature, ..)) => {
let fn_name = &signature.name;
let fq_function = Fqsn::from_scope_stack(scope_stack, fn_name.clone());
self.table
.fq_names
.register(fq_function.clone(), NameSpec { location, kind: NameKind::Function })?;
self.table.types.register(fq_function.clone(), NameSpec { location, kind: TypeKind })?;
self.add_symbol(id, fq_function, SymbolSpec::Func);
}
StatementKind::Declaration(Declaration::TypeDecl { name, .. }) => {
let fq_type = Fqsn::from_scope_stack(scope_stack, name.name.clone());
self.table.types.register(fq_type, NameSpec { location, kind: TypeKind })?;
}
StatementKind::Declaration(Declaration::Binding { name, .. }) => {
let fq_binding = Fqsn::from_scope_stack(scope_stack, name.clone());
self.table
.fq_names
.register(fq_binding.clone(), NameSpec { location, kind: NameKind::Binding })?;
if !function_scope {
self.add_symbol(id, fq_binding, SymbolSpec::GlobalBinding);
}
}
StatementKind::Module(ModuleSpecifier { name, .. }) => {
let fq_module = Fqsn::from_scope_stack(scope_stack, name.clone());
self.table.fq_names.register(fq_module, NameSpec { location, kind: NameKind::Module })?;
}
StatementKind::Declaration(Declaration::Annotation { name, arguments, inner }) => {
let inner = inner.as_ref();
self.add_single_statement(
&inner.id,
&inner.kind,
inner.location,
scope_stack,
function_scope,
)?;
self.process_annotation(name.as_ref(), arguments.as_slice(), scope_stack, inner)?;
}
_ => (),
}
Ok(())
}
fn process_annotation(
&mut self,
name: &str,
arguments: &[Expression],
scope_stack: &[ScopeSegment],
inner: &Statement,
) -> Result<(), SymbolError> {
println!("handling annotation: {}", name);
if name == "register_builtin" {
if let Statement {
id: _,
location: _,
kind: StatementKind::Declaration(Declaration::FuncDecl(sig, _)),
} = inner
{
let fqsn = Fqsn::from_scope_stack(scope_stack, sig.name.clone());
let builtin_name = match arguments {
[Expression { kind: ExpressionKind::Value(qname), .. }]
if qname.components.len() == 1 =>
qname.components[0].clone(),
_ =>
return Err(SymbolError::BadAnnotation {
name: name.to_string(),
msg: "Bad argument for register_builtin".to_string(),
}),
};
let builtin =
Builtin::from_str(builtin_name.as_str()).map_err(|_| SymbolError::BadAnnotation {
name: name.to_string(),
msg: format!("Invalid builtin: {}", builtin_name),
})?;
self.table.populate_single_builtin(fqsn, builtin);
Ok(())
} else {
Err(SymbolError::BadAnnotation {
name: name.to_string(),
msg: "register_builtin not annotating a function".to_string(),
})
}
} else {
Err(SymbolError::UnknownAnnotation { name: name.to_string() })
}
}
fn add_type_members(
&mut self,
type_name: &TypeSingletonName,
type_body: &TypeBody,
_mutable: &bool,
location: Location,
scope_stack: &mut Vec<ScopeSegment>,
) -> Vec<SymbolError> {
let (variants, immediate_variant) = match type_body {
TypeBody::Variants(variants) => (variants.clone(), false),
TypeBody::ImmediateRecord(id, fields) => (
vec![Variant {
id: *id,
name: type_name.name.clone(),
kind: VariantKind::Record(fields.clone()),
}],
true,
),
};
let type_fqsn = Fqsn::from_scope_stack(scope_stack, type_name.name.clone());
let new_scope = ScopeSegment::Name(type_name.name.clone());
scope_stack.push(new_scope);
// Check for duplicates before registering any types with the TypeContext
let mut seen_variants = HashSet::new();
let mut errors = vec![];
for variant in variants.iter() {
if seen_variants.contains(&variant.name) {
errors.push(SymbolError::DuplicateVariant {
type_fqsn: type_fqsn.clone(),
name: variant.name.as_ref().to_string(),
})
}
seen_variants.insert(variant.name.clone());
if let VariantKind::Record(ref members) = variant.kind {
let variant_name = Fqsn::from_scope_stack(scope_stack.as_ref(), variant.name.clone());
let mut seen_members = HashMap::new();
for (member_name, _) in members.iter() {
match seen_members.entry(member_name.as_ref()) {
Entry::Occupied(o) => {
let location = *o.get();
errors.push(SymbolError::DuplicateRecord {
type_name: variant_name.clone(),
location,
member: member_name.as_ref().to_string(),
});
}
//TODO eventually this should track meaningful locations
Entry::Vacant(v) => {
v.insert(location);
}
}
}
}
}
if !errors.is_empty() {
return errors;
}
let mut type_builder = TypeBuilder::new(type_name.name.as_ref());
let mut fqsn_id_map = HashMap::new();
for variant in variants.iter() {
let Variant { name, kind, id } = variant;
fqsn_id_map.insert(Fqsn::from_scope_stack(scope_stack.as_ref(), name.clone()), id);
let mut variant_builder = VariantBuilder::new(name.as_ref());
match kind {
VariantKind::UnitStruct => (),
VariantKind::TupleStruct(items) =>
for type_identifier in items {
let pending: PendingType = type_identifier.into();
variant_builder.add_member(pending);
},
VariantKind::Record(members) =>
for (field_name, type_identifier) in members.iter() {
let pending: PendingType = type_identifier.into();
variant_builder.add_record_member(field_name.as_ref(), pending);
},
}
type_builder.add_variant(variant_builder);
}
let type_id = self.type_context.register_type(type_builder);
let type_definition = self.type_context.lookup_type(&type_id).unwrap();
// This index is guaranteed to be the correct tag
for (index, variant) in type_definition.variants.iter().enumerate() {
let fqsn = Fqsn::from_scope_stack(scope_stack.as_ref(), Rc::new(variant.name.to_string()));
let id = fqsn_id_map.get(&fqsn).unwrap();
let tag = index as u32;
let spec = match &variant.members {
type_inference::VariantMembers::Unit => SymbolSpec::DataConstructor { tag, type_id },
type_inference::VariantMembers::Tuple(..) => SymbolSpec::DataConstructor { tag, type_id },
type_inference::VariantMembers::Record(..) => SymbolSpec::RecordConstructor { tag, type_id },
};
self.table.add_symbol(id, fqsn, spec);
}
if immediate_variant {
let variant = &type_definition.variants[0];
let fqsn = Fqsn::from_scope_stack(scope_stack.as_ref(), Rc::new(variant.name.to_string()));
let id = fqsn_id_map.get(&fqsn).unwrap();
let abbrev_fqsn = Fqsn::from_scope_stack(
scope_stack[0..scope_stack.len() - 1].as_ref(),
Rc::new(variant.name.to_string()),
);
let spec = SymbolSpec::RecordConstructor { tag: 0, type_id };
self.table.add_symbol(id, abbrev_fqsn, spec);
}
scope_stack.pop();
vec![]
}
}