schala/schala-lang/language/src/symbol_table/mod.rs

552 lines
19 KiB
Rust
Raw Normal View History

use std::collections::HashMap;
use std::collections::hash_map::Entry;
use std::rc::Rc;
use std::fmt;
use std::fmt::Write;
use crate::tokenizing::{Location, LineNumber};
2019-01-07 13:00:37 -08:00
use crate::ast;
use crate::ast::{ItemId, TypeBody, Variant, TypeSingletonName, Signature, Declaration, Statement, StatementKind, ModuleSpecifier};
2019-01-07 13:00:37 -08:00
use crate::typechecking::TypeName;
2021-10-19 13:48:00 -07:00
mod resolver;
2021-10-14 06:52:50 -07:00
mod tables;
2021-10-14 06:55:57 -07:00
use tables::DeclLocations;
2019-10-17 03:15:39 -07:00
mod symbol_trie;
use symbol_trie::SymbolTrie;
2019-10-16 19:51:43 -07:00
mod test;
/// Fully-qualified symbol name
#[derive(Debug, Clone, Eq, PartialEq, Hash, PartialOrd, Ord)]
2021-10-19 13:48:00 -07:00
pub struct FQSN {
//TODO FQSN's need to be cheaply cloneable
scopes: Vec<Scope>, //TODO rename to ScopeSegment
}
impl FQSN {
fn from_scope_stack(scopes: &[Scope], new_name: String) -> Self {
let mut v = Vec::new();
for s in scopes {
v.push(s.clone());
}
v.push(Scope::Name(new_name));
FQSN { scopes: v }
}
fn extend(&self, new_name: String) -> Self {
let mut existing = self.scopes.clone();
existing.push(Scope::Name(new_name));
FQSN { scopes: existing }
}
}
//TODO eventually this should use ItemId's to avoid String-cloning
/// One segment within a scope.
#[derive(Debug, Clone, Eq, PartialEq, Hash, PartialOrd, Ord)]
enum Scope {
Name(String)
}
#[derive(Debug, Clone)]
struct DuplicateName {
prev_name: FQSN,
location: Location
}
#[derive(Debug)]
struct NameSpec<K> {
location: Location,
kind: K
}
#[derive(Debug)]
enum NameKind {
Module,
Function,
Binding,
}
#[derive(Debug)]
struct TypeKind;
/// Keeps track of what names were used in a given namespace.
struct NameTable<K> {
table: HashMap<FQSN, NameSpec<K>>
}
impl<K> NameTable<K> {
fn new() -> Self {
Self { table: HashMap::new() }
}
fn register(&mut self, name: FQSN, spec: NameSpec<K>) -> Result<(), DuplicateName> {
match self.table.entry(name.clone()) {
Entry::Occupied(o) => {
Err(DuplicateName { prev_name: name, location: o.get().location })
},
Entry::Vacant(v) => {
v.insert(spec);
Ok(())
}
}
}
}
2019-09-25 02:43:07 -07:00
#[derive(PartialEq, Eq, Hash, Debug, Clone, PartialOrd, Ord)]
2019-08-30 19:03:52 -07:00
pub struct FullyQualifiedSymbolName(pub Vec<ScopeSegment>);
impl fmt::Display for FullyQualifiedSymbolName {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
let FullyQualifiedSymbolName(v) = self;
for segment in v {
write!(f, "::{}", segment)?;
}
Ok(())
}
}
2019-09-25 02:43:07 -07:00
#[derive(Debug, Clone, Eq, PartialEq, Hash, PartialOrd, Ord)]
pub struct ScopeSegment {
2019-08-31 23:39:01 -07:00
pub name: Rc<String>, //TODO maybe this could be a &str, for efficiency?
}
impl fmt::Display for ScopeSegment {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
let kind = ""; //TODO implement some kind of kind-tracking here
2019-09-25 02:28:24 -07:00
write!(f, "{}{}", self.name, kind)
}
}
2019-08-30 19:03:52 -07:00
impl ScopeSegment {
2021-10-13 00:53:32 -07:00
#[allow(dead_code)]
pub fn new(name: Rc<String>) -> ScopeSegment {
ScopeSegment { name }
2019-08-30 19:03:52 -07:00
}
}
//cf. p. 150 or so of Language Implementation Patterns
pub struct SymbolTable {
2021-10-19 13:48:00 -07:00
decl_locations: DeclLocations, //TODO delete this
symbol_path_to_symbol: HashMap<FullyQualifiedSymbolName, Symbol>,
2019-10-17 03:15:39 -07:00
symbol_trie: SymbolTrie,
2021-10-19 13:48:00 -07:00
/// These tables are responsible for preventing duplicate names.
2021-10-18 22:51:36 -07:00
fq_names: NameTable<NameKind>, //Note that presence of two tables implies that a type and other binding with the same name can co-exist
types: NameTable<TypeKind>,
2021-10-19 13:48:00 -07:00
/// A map of the `ItemId`s of instances of use of names to their fully-canonicalized FQSN form.
id_to_fqsn: HashMap<ItemId, FQSN>,
/// A map of the FQSN of an AST definition to a Symbol data structure, which contains
/// some basic information about what that symbol is and (ideally) references to other tables
/// (e.g. typechecking tables) with more information about that symbol.
fqsn_to_symbol: HashMap<FQSN, Symbol>,
}
impl SymbolTable {
pub fn new() -> SymbolTable {
2019-03-07 23:51:31 -08:00
SymbolTable {
2021-10-14 06:55:57 -07:00
decl_locations: DeclLocations::new(),
symbol_path_to_symbol: HashMap::new(),
symbol_trie: SymbolTrie::new(),
2021-10-19 13:48:00 -07:00
fq_names: NameTable::new(),
types: NameTable::new(),
2021-10-19 13:48:00 -07:00
id_to_fqsn: HashMap::new(),
fqsn_to_symbol: HashMap::new(),
2019-03-07 23:51:31 -08:00
}
}
2018-08-05 18:19:48 -07:00
2021-10-19 13:48:00 -07:00
pub fn lookup_symbol(&self, id: &ItemId) -> Option<&Symbol> {
let fqsn = self.id_to_fqsn.get(id);
fqsn.and_then(|fqsn| self.fqsn_to_symbol.get(fqsn))
}
2019-09-25 02:18:36 -07:00
fn add_new_symbol(&mut self, local_name: &Rc<String>, scope_path: &Vec<ScopeSegment>, spec: SymbolSpec) {
let mut vec: Vec<ScopeSegment> = scope_path.clone();
vec.push(ScopeSegment { name: local_name.clone() });
let fully_qualified_name = FullyQualifiedSymbolName(vec);
2021-10-19 13:48:00 -07:00
let symbol = Symbol { local_name: local_name.clone(), /*fully_qualified_name: fully_qualified_name.clone(),*/ spec };
//self.symbol_trie.insert(&fully_qualified_name);
self.symbol_path_to_symbol.insert(fully_qualified_name, symbol);
}
}
2021-10-13 00:53:32 -07:00
#[allow(dead_code)]
#[derive(Debug)]
pub struct Symbol {
2021-10-14 06:52:50 -07:00
pub local_name: Rc<String>,
2021-10-19 13:48:00 -07:00
//fully_qualified_name: FullyQualifiedSymbolName,
pub spec: SymbolSpec,
}
2018-06-03 23:04:07 -07:00
impl fmt::Display for Symbol {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "<Local name: {}, Spec: {}>", self.local_name, self.spec)
2018-06-03 23:04:07 -07:00
}
}
#[derive(Debug)]
pub enum SymbolSpec {
2018-06-03 02:39:49 -07:00
Func(Vec<TypeName>),
2018-05-30 23:54:24 -07:00
DataConstructor {
index: usize,
2021-10-19 13:48:00 -07:00
type_name: TypeName, //TODO this eventually needs to be some kind of ID
type_args: Vec<Rc<String>>, //TODO this should be a lookup table into type information, it's not the concern of the symbol table
2018-05-30 23:54:24 -07:00
},
2019-01-25 00:57:01 -08:00
RecordConstructor {
index: usize,
members: HashMap<Rc<String>, TypeName>,
type_name: TypeName,
},
2019-10-16 20:22:40 -07:00
Binding,
Type {
name: TypeName
},
}
2018-06-03 23:04:07 -07:00
impl fmt::Display for SymbolSpec {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
use self::SymbolSpec::*;
match self {
Func(type_names) => write!(f, "Func({:?})", type_names),
DataConstructor { index, type_name, type_args } => write!(f, "DataConstructor(idx: {})({:?} -> {})", index, type_args, type_name),
RecordConstructor { type_name, index, ..} => write!(f, "RecordConstructor(idx: {})(<members> -> {})", index, type_name),
Binding => write!(f, "Binding"),
2019-10-16 20:22:40 -07:00
Type { name } => write!(f, "Type <{}>", name),
2018-06-03 23:04:07 -07:00
}
}
}
impl SymbolTable {
/* note: this adds names for *forward reference* but doesn't actually create any types. solve that problem
* later */
2021-10-18 23:41:29 -07:00
/// The main entry point into the symbol table. This will traverse the AST in several
/// different ways and populate subtables with information that will be used further in the
/// compilation process.
pub fn process_ast(&mut self, ast: &ast::AST) -> Result<(), String> {
self.populate_name_tables(ast)?;
self.resolve_symbol_ids(ast)?;
Ok(())
}
2021-10-19 13:48:00 -07:00
/// Walks the AST, matching the ID of an identifier used in some expression to
/// the corresponding Symbol.
2021-10-18 23:41:29 -07:00
fn resolve_symbol_ids(&mut self, ast: &ast::AST) -> Result<(), String> {
2021-10-19 13:48:00 -07:00
let mut resolver = resolver::Resolver::new(self);
resolver.resolve(ast)?;
2021-10-18 23:41:29 -07:00
Ok(())
}
/// This function traverses the AST and adds symbol table entries for
/// constants, functions, types, and modules defined within. This simultaneously
/// checks for dupicate definitions (and returns errors if discovered), and sets
/// up name tables that will be used by further parts of the compiler
2021-10-18 23:41:29 -07:00
fn populate_name_tables(&mut self, ast: &ast::AST) -> Result<(), String> {
2021-10-19 13:48:00 -07:00
let mut scope_stack = vec![];
self.add_from_scope(ast.statements.as_ref(), &mut scope_stack)
.map_err(|err| format!("{:?}", err))?;
Ok(())
}
//TODO this should probably return a vector of duplicate name errors
fn add_from_scope<'a>(&'a mut self, statements: &[Statement], scope_stack: &mut Vec<Scope>) -> Result<(), DuplicateName> {
for statement in statements {
2021-10-19 13:48:00 -07:00
let Statement { id, kind, location } = statement;
let location = *location;
match kind {
StatementKind::Declaration(Declaration::FuncSig(signature)) => {
let fn_name: String = signature.name.as_str().to_owned();
let fq_function = FQSN::from_scope_stack(scope_stack.as_ref(), fn_name);
self.fq_names.register(fq_function.clone(), NameSpec { location, kind: NameKind::Function })?;
2021-10-19 13:48:00 -07:00
self.types.register(fq_function.clone(), NameSpec { location, kind: TypeKind } )?;
self.fqsn_to_symbol.insert(fq_function, Symbol {
local_name: signature.name.clone(),
spec: SymbolSpec::Func(vec![]), //TODO does this inner vec need to exist at all?
});
}
StatementKind::Declaration(Declaration::FuncDecl(signature, body)) => {
let fn_name: String = signature.name.as_str().to_owned();
let new_scope = Scope::Name(fn_name.clone());
let fq_function = FQSN::from_scope_stack(scope_stack.as_ref(), fn_name);
self.fq_names.register(fq_function.clone(), NameSpec { location, kind: NameKind::Function })?;
2021-10-19 13:48:00 -07:00
self.types.register(fq_function.clone(), NameSpec { location, kind: TypeKind } )?;
self.fqsn_to_symbol.insert(fq_function, Symbol {
local_name: signature.name.clone(),
spec: SymbolSpec::Func(vec![]), //TODO does this inner vec need to exist at all?
});
scope_stack.push(new_scope);
let output = self.add_from_scope(body.as_ref(), scope_stack);
scope_stack.pop();
output?
},
StatementKind::Declaration(Declaration::TypeDecl { name, body, mutable }) => {
let fq_type = FQSN::from_scope_stack(scope_stack.as_ref(), name.name.as_ref().to_owned());
self.types.register(fq_type, NameSpec { location, kind: TypeKind } )?;
2021-10-19 13:48:00 -07:00
if let Err(errors) = self.add_type_members(id, name, body, mutable, location, scope_stack) {
return Err(errors[0].clone());
}
},
StatementKind::Declaration(Declaration::Binding { name, .. }) => {
let fq_binding = FQSN::from_scope_stack(scope_stack.as_ref(), name.as_str().to_owned());
2021-10-19 13:48:00 -07:00
self.fq_names.register(fq_binding.clone(), NameSpec { location, kind: NameKind::Binding })?;
self.fqsn_to_symbol.insert(fq_binding, Symbol {
local_name: name.clone(),
spec: SymbolSpec::Binding,
});
}
StatementKind::Module(ModuleSpecifier { name, contents }) => {
let mod_name = name.as_str().to_owned();
let fq_module = FQSN::from_scope_stack(scope_stack.as_ref(), mod_name.clone());
let new_scope = Scope::Name(mod_name);
self.fq_names.register(fq_module, NameSpec { location, kind: NameKind::Module })?;
scope_stack.push(new_scope);
let output = self.add_from_scope(contents.as_ref(), scope_stack);
scope_stack.pop();
output?
},
_ => (),
}
}
Ok(())
}
2021-10-19 13:48:00 -07:00
fn add_type_members(&mut self, id: &ItemId, type_name: &TypeSingletonName, type_body: &TypeBody, _mutable: &bool, location: Location, scope_stack: &mut Vec<Scope>) -> Result<(), Vec<DuplicateName>> {
let mut errors = vec![];
2021-10-19 13:48:00 -07:00
let mut register = |fqsn: FQSN, spec: SymbolSpec| {
let name_spec = NameSpec { location, kind: TypeKind };
if let Err(err) = self.types.register(fqsn.clone(), name_spec) {
errors.push(err);
2021-10-19 13:48:00 -07:00
} else {
let local_name = match spec {
SymbolSpec::DataConstructor { ref type_name, ..} | SymbolSpec::RecordConstructor { ref type_name, .. } => type_name.clone(),
_ => panic!("This should never happen"),
};
let symbol = Symbol { local_name, spec };
self.fqsn_to_symbol.insert(fqsn, symbol);
};
};
let TypeBody(variants) = type_body;
let new_scope = Scope::Name(type_name.name.as_ref().to_owned());
scope_stack.push(new_scope);
2021-10-19 13:48:00 -07:00
for (index, variant) in variants.iter().enumerate() {
match variant {
2021-10-19 13:48:00 -07:00
Variant::UnitStruct(name) => {
let fq_name = FQSN::from_scope_stack(scope_stack.as_ref(), name.as_ref().to_owned());
let spec = SymbolSpec::DataConstructor {
index,
type_name: name.clone(),
type_args: vec![],
};
register(fq_name, spec);
},
Variant::TupleStruct(name, items) => {
let fq_name = FQSN::from_scope_stack(scope_stack.as_ref(), name.as_ref().to_owned());
2021-10-19 13:48:00 -07:00
let spec = SymbolSpec::DataConstructor {
index,
type_name: name.clone(),
type_args: items.iter().map(|_| Rc::new("DUMMY_TYPE_ID".to_string())).collect()
};
register(fq_name, spec);
},
Variant::Record { name, members } => {
let fq_name = FQSN::from_scope_stack(scope_stack.as_ref(), name.as_ref().to_owned());
2021-10-19 13:48:00 -07:00
let spec = SymbolSpec::RecordConstructor {
index,
type_name: name.clone(),
members: members.iter()
.map(|(_, _)| (Rc::new("DUMMY_FIELD".to_string()), Rc::new("DUMMY_TYPE_ID".to_string()))).collect()
};
register(fq_name, spec);
//TODO check for duplicates among struct member definitions
/*
let mut duplicate_member_definitions = Vec::new();
for (member_name, member_type) in defined_members {
match members.entry(member_name.clone()) {
Entry::Occupied(_) => duplicate_member_definitions.push(member_name.clone()),
Entry::Vacant(v) => {
v.insert(match member_type {
TypeIdentifier::Singleton(TypeSingletonName { name, ..}) => name.clone(),
TypeIdentifier::Tuple(_) => unimplemented!(),
});
}
}
}
if duplicate_member_definitions.len() != 0 {
return Err(format!("Duplicate member(s) in definition of type {}: {:?}", type_name, duplicate_member_definitions));
}
2021-10-19 13:48:00 -07:00
*/
}
}
}
scope_stack.pop();
if errors.is_empty() {
Ok(())
} else {
Err(errors)
}
}
2018-06-04 19:25:40 -07:00
pub fn add_top_level_symbols(&mut self, ast: &ast::AST) -> Result<(), String> {
2019-03-10 16:04:20 -07:00
let mut scope_name_stack = Vec::new();
2019-09-11 19:06:00 -07:00
self.add_symbols_from_scope(&ast.statements, &mut scope_name_stack)
}
fn add_symbols_from_scope<'a>(&'a mut self, statements: &Vec<Statement>, scope_name_stack: &mut Vec<ScopeSegment>) -> Result<(), String> {
2018-06-04 19:25:40 -07:00
use self::ast::Declaration::*;
for statement in statements.iter() {
match statement {
Statement { kind: StatementKind::Declaration(decl), id, location, } => {
2021-10-14 06:55:57 -07:00
self.decl_locations.add_location(id, *location);
match decl {
FuncSig(ref signature) => {
self.add_function_signature(signature, scope_name_stack)?
}
FuncDecl(ref signature, ref body) => {
self.add_function_signature(signature, scope_name_stack)?;
scope_name_stack.push(ScopeSegment{
name: signature.name.clone(),
});
let output = self.add_symbols_from_scope(body, scope_name_stack);
2019-10-24 02:13:07 -07:00
scope_name_stack.pop();
output?
},
TypeDecl { name, body, mutable } => {
self.add_type_decl(name, body, mutable, scope_name_stack)?
},
Binding { name, .. } => {
self.add_new_symbol(name, scope_name_stack, SymbolSpec::Binding);
}
_ => ()
}
},
Statement { kind: StatementKind::Module(ModuleSpecifier { name, contents}), id, location } => {
2021-10-14 06:55:57 -07:00
self.decl_locations.add_location(id, *location);
2019-10-24 02:13:07 -07:00
scope_name_stack.push(ScopeSegment { name: name.clone() });
let output = self.add_symbols_from_scope(contents, scope_name_stack);
scope_name_stack.pop();
output?
},
_ => ()
}
}
Ok(())
}
#[allow(dead_code)]
pub fn debug_symbol_table(&self) -> String {
2021-10-14 06:52:50 -07:00
let mut output = "Symbol table\n".to_string();
2019-09-25 02:43:07 -07:00
let mut sorted_symbols: Vec<(&FullyQualifiedSymbolName, &Symbol)> = self.symbol_path_to_symbol.iter().collect();
sorted_symbols.sort_by(|(fqsn, _), (other_fqsn, _)| fqsn.cmp(other_fqsn));
for (name, sym) in sorted_symbols.iter() {
write!(output, "{} -> {}\n", name, sym).unwrap();
}
output
}
2019-01-20 00:22:35 -08:00
fn add_function_signature(&mut self, signature: &Signature, scope_name_stack: &mut Vec<ScopeSegment>) -> Result<(), String> {
2019-01-20 00:22:35 -08:00
let mut local_type_context = LocalTypeContext::new();
2019-06-16 14:56:52 -07:00
let types = signature.params.iter().map(|param| match param.anno {
Some(ref type_identifier) => Rc::new(format!("{:?}", type_identifier)),
None => local_type_context.new_universal_type()
2019-01-20 00:22:35 -08:00
}).collect();
self.add_new_symbol(&signature.name, scope_name_stack, SymbolSpec::Func(types));
2019-01-20 22:13:05 -08:00
Ok(())
}
2019-08-12 11:27:16 -07:00
//TODO handle type mutability
fn add_type_decl(&mut self, type_name: &TypeSingletonName, body: &TypeBody, _mutable: &bool, scope_name_stack: &mut Vec<ScopeSegment>) -> Result<(), String> {
use crate::ast::{TypeIdentifier};
2019-01-20 22:13:05 -08:00
let TypeBody(variants) = body;
let ref type_name = type_name.name;
2019-10-16 20:22:40 -07:00
let type_spec = SymbolSpec::Type {
name: type_name.clone(),
};
self.add_new_symbol(type_name, &scope_name_stack, type_spec);
2019-09-10 03:31:23 -07:00
scope_name_stack.push(ScopeSegment{
name: type_name.clone(),
});
2019-01-20 22:13:05 -08:00
//TODO figure out why _params isn't being used here
for (index, var) in variants.iter().enumerate() {
match var {
Variant::UnitStruct(variant_name) => {
let spec = SymbolSpec::DataConstructor {
index,
type_name: type_name.clone(),
2019-01-20 22:13:05 -08:00
type_args: vec![],
};
self.add_new_symbol(variant_name, scope_name_stack, spec);
2019-01-20 22:13:05 -08:00
},
Variant::TupleStruct(variant_name, tuple_members) => {
//TODO fix the notion of a tuple type
2019-01-20 22:13:05 -08:00
let type_args = tuple_members.iter().map(|type_name| match type_name {
TypeIdentifier::Singleton(TypeSingletonName { name, ..}) => name.clone(),
TypeIdentifier::Tuple(_) => unimplemented!(),
}).collect();
let spec = SymbolSpec::DataConstructor {
index,
type_name: type_name.clone(),
2019-01-20 22:13:05 -08:00
type_args
};
self.add_new_symbol(variant_name, scope_name_stack, spec);
2019-01-20 22:13:05 -08:00
},
Variant::Record { name, members: defined_members } => {
let mut members = HashMap::new();
let mut duplicate_member_definitions = Vec::new();
for (member_name, member_type) in defined_members {
match members.entry(member_name.clone()) {
Entry::Occupied(_) => duplicate_member_definitions.push(member_name.clone()),
Entry::Vacant(v) => {
v.insert(match member_type {
TypeIdentifier::Singleton(TypeSingletonName { name, ..}) => name.clone(),
TypeIdentifier::Tuple(_) => unimplemented!(),
});
}
}
}
if duplicate_member_definitions.len() != 0 {
return Err(format!("Duplicate member(s) in definition of type {}: {:?}", type_name, duplicate_member_definitions));
}
let spec = SymbolSpec::RecordConstructor { index, type_name: type_name.clone(), members };
self.add_new_symbol(name, scope_name_stack, spec);
2019-01-25 00:57:01 -08:00
},
2019-01-20 22:13:05 -08:00
}
}
2019-09-10 03:31:23 -07:00
scope_name_stack.pop();
2019-01-20 22:13:05 -08:00
Ok(())
2019-01-20 00:22:35 -08:00
}
}
struct LocalTypeContext {
state: u8
}
impl LocalTypeContext {
fn new() -> LocalTypeContext {
LocalTypeContext { state: 0 }
}
fn new_universal_type(&mut self) -> TypeName {
let n = self.state;
self.state += 1;
Rc::new(format!("{}", (('a' as u8) + n) as char))
}
}
2019-03-07 23:51:31 -08:00