schala/schala-lang/language/src/symbol_table/mod.rs

525 lines
18 KiB
Rust

use std::collections::HashMap;
use std::collections::hash_map::Entry;
use std::rc::Rc;
use std::fmt;
use std::fmt::Write;
use crate::tokenizing::{Location, LineNumber};
use crate::ast;
use crate::ast::{ItemId, TypeBody, Variant, TypeSingletonName, Signature, Declaration, Statement, StatementKind, ModuleSpecifier};
use crate::typechecking::TypeName;
#[allow(unused_macros)]
macro_rules! fqsn {
( $( $name:expr ; $kind:tt),* ) => {
{
let mut vec = vec![];
$(
vec.push(crate::symbol_table::ScopeSegment::new(std::rc::Rc::new($name.to_string())));
)*
FullyQualifiedSymbolName(vec)
}
};
}
mod tables;
use tables::DeclLocations;
mod symbol_trie;
use symbol_trie::SymbolTrie;
mod test;
/// Fully-qualified symbol name
#[derive(Debug, Clone, Eq, PartialEq, Hash, PartialOrd, Ord)]
struct FQSN {
scopes: Vec<Scope>,
}
impl FQSN {
fn from_scope_stack(scopes: &[Scope], new_name: String) -> Self {
let mut v = Vec::new();
for s in scopes {
v.push(s.clone());
}
v.push(Scope::Name(new_name));
FQSN { scopes: v }
}
fn extend(&self, new_name: String) -> Self {
let mut existing = self.scopes.clone();
existing.push(Scope::Name(new_name));
FQSN { scopes: existing }
}
}
//TODO eventually this should use ItemId's to avoid String-cloning
/// One segment within a scope.
#[derive(Debug, Clone, Eq, PartialEq, Hash, PartialOrd, Ord)]
enum Scope {
Top,
Name(String)
}
#[derive(Debug, Clone)]
struct DuplicateName {
prev_name: FQSN,
location: Location
}
//TODO should map to a Spec* type that has Location and Kind of namespace entry
//tht way I don't need as many tables
/// Keeps track of what names were used in a given namespace.
struct NameTable {
table: HashMap<FQSN, Location>
}
impl NameTable {
fn new() -> Self {
Self { table: HashMap::new() }
}
fn register(&mut self, name: FQSN, location: Location) -> Result<(), DuplicateName> {
match self.table.entry(name.clone()) {
Entry::Occupied(o) => {
Err(DuplicateName { prev_name: name, location: *o.get() })
},
Entry::Vacant(v) => {
v.insert(location);
Ok(())
}
}
}
}
/// Keeps track of what names were used in a given namespace. Call try_register to add a name to
/// the table, or report an error if a name already exists.
struct DuplicateNameTrackTable {
table: HashMap<Rc<String>, LineNumber>,
}
impl DuplicateNameTrackTable {
fn new() -> DuplicateNameTrackTable {
DuplicateNameTrackTable { table: HashMap::new() }
}
fn try_register(&mut self, name: &Rc<String>, id: &ItemId, decl_locations: &DeclLocations) -> Result<(), LineNumber> {
match self.table.entry(name.clone()) {
Entry::Occupied(o) => {
let line_number = o.get();
Err(*line_number)
},
Entry::Vacant(v) => {
let line_number = if let Some(loc) = decl_locations.lookup(id) {
loc.line_num
} else {
0
};
v.insert(line_number);
Ok(())
}
}
}
}
#[derive(PartialEq, Eq, Hash, Debug, Clone, PartialOrd, Ord)]
pub struct FullyQualifiedSymbolName(pub Vec<ScopeSegment>);
impl fmt::Display for FullyQualifiedSymbolName {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
let FullyQualifiedSymbolName(v) = self;
for segment in v {
write!(f, "::{}", segment)?;
}
Ok(())
}
}
#[derive(Debug, Clone, Eq, PartialEq, Hash, PartialOrd, Ord)]
pub struct ScopeSegment {
pub name: Rc<String>, //TODO maybe this could be a &str, for efficiency?
}
impl fmt::Display for ScopeSegment {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
let kind = ""; //TODO implement some kind of kind-tracking here
write!(f, "{}{}", self.name, kind)
}
}
impl ScopeSegment {
#[allow(dead_code)]
pub fn new(name: Rc<String>) -> ScopeSegment {
ScopeSegment { name }
}
}
//cf. p. 150 or so of Language Implementation Patterns
pub struct SymbolTable {
decl_locations: DeclLocations,
symbol_path_to_symbol: HashMap<FullyQualifiedSymbolName, Symbol>,
id_to_fqsn: HashMap<ItemId, FullyQualifiedSymbolName>,
symbol_trie: SymbolTrie,
functions: NameTable, //TODO maybe bindings and functions should be the same table?
types: NameTable,
bindings: NameTable, //TODO NameTable should be a trie to facilitate quick lookups
}
impl SymbolTable {
pub fn new() -> SymbolTable {
SymbolTable {
decl_locations: DeclLocations::new(),
symbol_path_to_symbol: HashMap::new(),
id_to_fqsn: HashMap::new(),
symbol_trie: SymbolTrie::new(),
functions: NameTable::new(),
types: NameTable::new(),
bindings: NameTable::new(),
}
}
pub fn map_id_to_fqsn(&mut self, id: &ItemId, fqsn: FullyQualifiedSymbolName) {
self.id_to_fqsn.insert(id.clone(), fqsn);
}
pub fn get_fqsn_from_id(&self, id: &ItemId) -> Option<FullyQualifiedSymbolName> {
self.id_to_fqsn.get(&id).cloned()
}
fn add_new_symbol(&mut self, local_name: &Rc<String>, scope_path: &Vec<ScopeSegment>, spec: SymbolSpec) {
let mut vec: Vec<ScopeSegment> = scope_path.clone();
vec.push(ScopeSegment { name: local_name.clone() });
let fully_qualified_name = FullyQualifiedSymbolName(vec);
let symbol = Symbol { local_name: local_name.clone(), fully_qualified_name: fully_qualified_name.clone(), spec };
self.symbol_trie.insert(&fully_qualified_name);
self.symbol_path_to_symbol.insert(fully_qualified_name, symbol);
}
pub fn lookup_by_fqsn(&self, fully_qualified_path: &FullyQualifiedSymbolName) -> Option<&Symbol> {
self.symbol_path_to_symbol.get(fully_qualified_path)
}
pub fn lookup_children_of_fqsn(&self, path: &FullyQualifiedSymbolName) -> Vec<FullyQualifiedSymbolName> {
self.symbol_trie.get_children(path)
}
}
#[allow(dead_code)]
#[derive(Debug)]
pub struct Symbol {
pub local_name: Rc<String>,
fully_qualified_name: FullyQualifiedSymbolName,
pub spec: SymbolSpec,
}
impl fmt::Display for Symbol {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "<Local name: {}, Spec: {}>", self.local_name, self.spec)
}
}
#[derive(Debug)]
pub enum SymbolSpec {
Func(Vec<TypeName>),
DataConstructor {
index: usize,
type_name: TypeName,
type_args: Vec<Rc<String>>,
},
RecordConstructor {
index: usize,
members: HashMap<Rc<String>, TypeName>,
type_name: TypeName,
},
Binding,
Type {
name: TypeName
},
}
impl fmt::Display for SymbolSpec {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
use self::SymbolSpec::*;
match self {
Func(type_names) => write!(f, "Func({:?})", type_names),
DataConstructor { index, type_name, type_args } => write!(f, "DataConstructor(idx: {})({:?} -> {})", index, type_args, type_name),
RecordConstructor { type_name, index, ..} => write!(f, "RecordConstructor(idx: {})(<members> -> {})", index, type_name),
Binding => write!(f, "Binding"),
Type { name } => write!(f, "Type <{}>", name),
}
}
}
impl SymbolTable {
/* note: this adds names for *forward reference* but doesn't actually create any types. solve that problem
* later */
/// This function traverses the AST and adds symbol table entries for
/// constants, functions, types, and modules defined within. This simultaneously
/// checks for dupicate definitions (and returns errors if discovered), and sets
/// up name tables that will be used by further parts of the compiler
pub fn process_ast(&mut self, ast: &ast::AST) -> Result<(), String> {
let mut scope_stack = vec![Scope::Top];
self.add_from_scope(ast.statements.as_ref(), &mut scope_stack)
.map_err(|err| format!("{:?}", err))?;
Ok(())
}
//TODO this should probably return a vector of duplicate name errors
fn add_from_scope<'a>(&'a mut self, statements: &[Statement], scope_stack: &mut Vec<Scope>) -> Result<(), DuplicateName> {
for statement in statements {
let Statement { id: _, kind, location } = statement;
let location = *location;
match kind {
StatementKind::Declaration(Declaration::FuncSig(signature)) => {
let fn_name: String = signature.name.as_str().to_owned();
let fq_function = FQSN::from_scope_stack(scope_stack.as_ref(), fn_name);
self.functions.register(fq_function.clone(), location)?;
self.types.register(fq_function, location)?;
}
StatementKind::Declaration(Declaration::FuncDecl(signature, body)) => {
let fn_name: String = signature.name.as_str().to_owned();
let new_scope = Scope::Name(fn_name.clone());
let fq_function = FQSN::from_scope_stack(scope_stack.as_ref(), fn_name);
self.functions.register(fq_function.clone(), location)?;
self.types.register(fq_function, location)?;
scope_stack.push(new_scope);
let output = self.add_from_scope(body.as_ref(), scope_stack);
scope_stack.pop();
output?
},
StatementKind::Declaration(Declaration::TypeDecl { name, body, mutable }) => {
let fq_type = FQSN::from_scope_stack(scope_stack.as_ref(), name.name.as_ref().to_owned());
self.types.register(fq_type, location)?;
if let Err(errors) = self.add_type_members(name, body, mutable, location, scope_stack) {
return Err(errors[0].clone());
}
},
StatementKind::Declaration(Declaration::Binding { name, .. }) => {
let fq_binding = FQSN::from_scope_stack(scope_stack.as_ref(), name.as_str().to_owned());
self.bindings.register(fq_binding, location)?;
}
StatementKind::Module(ModuleSpecifier { name, contents }) => {
let mod_name = name.as_str().to_owned();
let fq_module = FQSN::from_scope_stack(scope_stack.as_ref(), mod_name.clone());
let new_scope = Scope::Name(mod_name);
self.bindings.register(fq_module, location)?;
scope_stack.push(new_scope);
let output = self.add_from_scope(contents.as_ref(), scope_stack);
scope_stack.pop();
output?
},
_ => (),
}
}
Ok(())
}
fn add_type_members(&mut self, type_name: &TypeSingletonName, type_body: &TypeBody, _mutable: &bool, location: Location, scope_stack: &mut Vec<Scope>) -> Result<(), Vec<DuplicateName>> {
let mut errors = vec![];
let mut register = |fqsn: FQSN| {
if let Err(err) = self.types.register(fqsn, location) {
errors.push(err);
}
};
let TypeBody(variants) = type_body;
let new_scope = Scope::Name(type_name.name.as_ref().to_owned());
scope_stack.push(new_scope);
for variant in variants {
match variant {
Variant::UnitStruct(name) | Variant::TupleStruct(name, _) => {
let fq_name = FQSN::from_scope_stack(scope_stack.as_ref(), name.as_ref().to_owned());
register(fq_name);
},
Variant::Record { name, members } => {
let fq_name = FQSN::from_scope_stack(scope_stack.as_ref(), name.as_ref().to_owned());
register(fq_name.clone());
for (field_name, _) in members {
let fq_field_name = fq_name.extend(field_name.as_str().to_owned());
register(fq_field_name);
}
}
}
}
scope_stack.pop();
if errors.is_empty() {
Ok(())
} else {
Err(errors)
}
}
pub fn add_top_level_symbols(&mut self, ast: &ast::AST) -> Result<(), String> {
let mut scope_name_stack = Vec::new();
self.add_symbols_from_scope(&ast.statements, &mut scope_name_stack)
}
fn add_symbols_from_scope<'a>(&'a mut self, statements: &Vec<Statement>, scope_name_stack: &mut Vec<ScopeSegment>) -> Result<(), String> {
use self::ast::Declaration::*;
let mut seen_identifiers = DuplicateNameTrackTable::new();
let mut seen_modules = DuplicateNameTrackTable::new();
for statement in statements.iter() {
match statement {
Statement { kind: StatementKind::Declaration(decl), id, location, } => {
self.decl_locations.add_location(id, *location);
match decl {
FuncSig(ref signature) => {
seen_identifiers.try_register(&signature.name, id, &self.decl_locations)
.map_err(|line| format!("Duplicate function definition: {}. It's already defined at {}", signature.name, line))?;
self.add_function_signature(signature, scope_name_stack)?
}
FuncDecl(ref signature, ref body) => {
seen_identifiers.try_register(&signature.name, id, &self.decl_locations)
.map_err(|line| format!("Duplicate function definition: {}. It's already defined at {}", signature.name, line))?;
self.add_function_signature(signature, scope_name_stack)?;
scope_name_stack.push(ScopeSegment{
name: signature.name.clone(),
});
let output = self.add_symbols_from_scope(body, scope_name_stack);
scope_name_stack.pop();
output?
},
TypeDecl { name, body, mutable } => {
seen_identifiers.try_register(&name.name, &id, &self.decl_locations)
.map_err(|line| format!("Duplicate type definition: {}. It's already defined at {}", name.name, line))?;
self.add_type_decl(name, body, mutable, scope_name_stack)?
},
Binding { name, .. } => {
seen_identifiers.try_register(&name, &id, &self.decl_locations)
.map_err(|line| format!("Duplicate variable definition: {}. It's already defined at {}", name, line))?;
self.add_new_symbol(name, scope_name_stack, SymbolSpec::Binding);
}
_ => ()
}
},
Statement { kind: StatementKind::Module(ModuleSpecifier { name, contents}), id, location } => {
self.decl_locations.add_location(id, *location);
seen_modules.try_register(name, id, &self.decl_locations)
.map_err(|line| format!("Duplicate module definition: {}. It's already defined at {}", name, line))?;
scope_name_stack.push(ScopeSegment { name: name.clone() });
let output = self.add_symbols_from_scope(contents, scope_name_stack);
scope_name_stack.pop();
output?
},
_ => ()
}
}
Ok(())
}
#[allow(dead_code)]
pub fn debug_symbol_table(&self) -> String {
let mut output = "Symbol table\n".to_string();
let mut sorted_symbols: Vec<(&FullyQualifiedSymbolName, &Symbol)> = self.symbol_path_to_symbol.iter().collect();
sorted_symbols.sort_by(|(fqsn, _), (other_fqsn, _)| fqsn.cmp(other_fqsn));
for (name, sym) in sorted_symbols.iter() {
write!(output, "{} -> {}\n", name, sym).unwrap();
}
output
}
fn add_function_signature(&mut self, signature: &Signature, scope_name_stack: &mut Vec<ScopeSegment>) -> Result<(), String> {
let mut local_type_context = LocalTypeContext::new();
let types = signature.params.iter().map(|param| match param.anno {
Some(ref type_identifier) => Rc::new(format!("{:?}", type_identifier)),
None => local_type_context.new_universal_type()
}).collect();
self.add_new_symbol(&signature.name, scope_name_stack, SymbolSpec::Func(types));
Ok(())
}
//TODO handle type mutability
fn add_type_decl(&mut self, type_name: &TypeSingletonName, body: &TypeBody, _mutable: &bool, scope_name_stack: &mut Vec<ScopeSegment>) -> Result<(), String> {
use crate::ast::{TypeIdentifier};
let TypeBody(variants) = body;
let ref type_name = type_name.name;
let type_spec = SymbolSpec::Type {
name: type_name.clone(),
};
self.add_new_symbol(type_name, &scope_name_stack, type_spec);
scope_name_stack.push(ScopeSegment{
name: type_name.clone(),
});
//TODO figure out why _params isn't being used here
for (index, var) in variants.iter().enumerate() {
match var {
Variant::UnitStruct(variant_name) => {
let spec = SymbolSpec::DataConstructor {
index,
type_name: type_name.clone(),
type_args: vec![],
};
self.add_new_symbol(variant_name, scope_name_stack, spec);
},
Variant::TupleStruct(variant_name, tuple_members) => {
//TODO fix the notion of a tuple type
let type_args = tuple_members.iter().map(|type_name| match type_name {
TypeIdentifier::Singleton(TypeSingletonName { name, ..}) => name.clone(),
TypeIdentifier::Tuple(_) => unimplemented!(),
}).collect();
let spec = SymbolSpec::DataConstructor {
index,
type_name: type_name.clone(),
type_args
};
self.add_new_symbol(variant_name, scope_name_stack, spec);
},
Variant::Record { name, members: defined_members } => {
let mut members = HashMap::new();
let mut duplicate_member_definitions = Vec::new();
for (member_name, member_type) in defined_members {
match members.entry(member_name.clone()) {
Entry::Occupied(_) => duplicate_member_definitions.push(member_name.clone()),
Entry::Vacant(v) => {
v.insert(match member_type {
TypeIdentifier::Singleton(TypeSingletonName { name, ..}) => name.clone(),
TypeIdentifier::Tuple(_) => unimplemented!(),
});
}
}
}
if duplicate_member_definitions.len() != 0 {
return Err(format!("Duplicate member(s) in definition of type {}: {:?}", type_name, duplicate_member_definitions));
}
let spec = SymbolSpec::RecordConstructor { index, type_name: type_name.clone(), members };
self.add_new_symbol(name, scope_name_stack, spec);
},
}
}
scope_name_stack.pop();
Ok(())
}
}
struct LocalTypeContext {
state: u8
}
impl LocalTypeContext {
fn new() -> LocalTypeContext {
LocalTypeContext { state: 0 }
}
fn new_universal_type(&mut self) -> TypeName {
let n = self.state;
self.state += 1;
Rc::new(format!("{}", (('a' as u8) + n) as char))
}
}