schala/schala-lang/language/src/symbol_table/mod.rs

525 lines
18 KiB
Rust
Raw Normal View History

use std::collections::HashMap;
use std::collections::hash_map::Entry;
use std::rc::Rc;
use std::fmt;
use std::fmt::Write;
use crate::tokenizing::{Location, LineNumber};
2019-01-07 13:00:37 -08:00
use crate::ast;
use crate::ast::{ItemId, TypeBody, Variant, TypeSingletonName, Signature, Declaration, Statement, StatementKind, ModuleSpecifier};
2019-01-07 13:00:37 -08:00
use crate::typechecking::TypeName;
2019-10-17 03:15:39 -07:00
2019-10-16 19:51:43 -07:00
#[allow(unused_macros)]
macro_rules! fqsn {
( $( $name:expr ; $kind:tt),* ) => {
{
let mut vec = vec![];
$(
2019-10-21 03:02:11 -07:00
vec.push(crate::symbol_table::ScopeSegment::new(std::rc::Rc::new($name.to_string())));
2019-10-16 19:51:43 -07:00
)*
FullyQualifiedSymbolName(vec)
}
};
}
2021-10-14 06:52:50 -07:00
mod tables;
2021-10-14 06:55:57 -07:00
use tables::DeclLocations;
2019-10-17 03:15:39 -07:00
mod symbol_trie;
use symbol_trie::SymbolTrie;
2019-10-16 19:51:43 -07:00
mod test;
/// Fully-qualified symbol name
#[derive(Debug, Clone, Eq, PartialEq, Hash, PartialOrd, Ord)]
struct FQSN {
scopes: Vec<Scope>,
}
impl FQSN {
fn from_scope_stack(scopes: &[Scope], new_name: String) -> Self {
let mut v = Vec::new();
for s in scopes {
v.push(s.clone());
}
v.push(Scope::Name(new_name));
FQSN { scopes: v }
}
fn extend(&self, new_name: String) -> Self {
let mut existing = self.scopes.clone();
existing.push(Scope::Name(new_name));
FQSN { scopes: existing }
}
}
//TODO eventually this should use ItemId's to avoid String-cloning
/// One segment within a scope.
#[derive(Debug, Clone, Eq, PartialEq, Hash, PartialOrd, Ord)]
enum Scope {
Top,
Name(String)
}
#[derive(Debug, Clone)]
struct DuplicateName {
prev_name: FQSN,
location: Location
}
//TODO should map to a Spec* type that has Location and Kind of namespace entry
//tht way I don't need as many tables
/// Keeps track of what names were used in a given namespace.
struct NameTable {
table: HashMap<FQSN, Location>
}
impl NameTable {
fn new() -> Self {
Self { table: HashMap::new() }
}
fn register(&mut self, name: FQSN, location: Location) -> Result<(), DuplicateName> {
match self.table.entry(name.clone()) {
Entry::Occupied(o) => {
Err(DuplicateName { prev_name: name, location: *o.get() })
},
Entry::Vacant(v) => {
v.insert(location);
Ok(())
}
}
}
}
/// Keeps track of what names were used in a given namespace. Call try_register to add a name to
/// the table, or report an error if a name already exists.
struct DuplicateNameTrackTable {
table: HashMap<Rc<String>, LineNumber>,
}
impl DuplicateNameTrackTable {
fn new() -> DuplicateNameTrackTable {
DuplicateNameTrackTable { table: HashMap::new() }
}
2021-10-14 06:55:57 -07:00
fn try_register(&mut self, name: &Rc<String>, id: &ItemId, decl_locations: &DeclLocations) -> Result<(), LineNumber> {
match self.table.entry(name.clone()) {
Entry::Occupied(o) => {
let line_number = o.get();
Err(*line_number)
},
Entry::Vacant(v) => {
2021-10-14 06:55:57 -07:00
let line_number = if let Some(loc) = decl_locations.lookup(id) {
loc.line_num
} else {
0
};
v.insert(line_number);
Ok(())
}
}
}
}
2019-09-25 02:43:07 -07:00
#[derive(PartialEq, Eq, Hash, Debug, Clone, PartialOrd, Ord)]
2019-08-30 19:03:52 -07:00
pub struct FullyQualifiedSymbolName(pub Vec<ScopeSegment>);
impl fmt::Display for FullyQualifiedSymbolName {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
let FullyQualifiedSymbolName(v) = self;
for segment in v {
write!(f, "::{}", segment)?;
}
Ok(())
}
}
2019-09-25 02:43:07 -07:00
#[derive(Debug, Clone, Eq, PartialEq, Hash, PartialOrd, Ord)]
pub struct ScopeSegment {
2019-08-31 23:39:01 -07:00
pub name: Rc<String>, //TODO maybe this could be a &str, for efficiency?
}
impl fmt::Display for ScopeSegment {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
let kind = ""; //TODO implement some kind of kind-tracking here
2019-09-25 02:28:24 -07:00
write!(f, "{}{}", self.name, kind)
}
}
2019-08-30 19:03:52 -07:00
impl ScopeSegment {
2021-10-13 00:53:32 -07:00
#[allow(dead_code)]
pub fn new(name: Rc<String>) -> ScopeSegment {
ScopeSegment { name }
2019-08-30 19:03:52 -07:00
}
}
//cf. p. 150 or so of Language Implementation Patterns
pub struct SymbolTable {
2021-10-14 06:55:57 -07:00
decl_locations: DeclLocations,
symbol_path_to_symbol: HashMap<FullyQualifiedSymbolName, Symbol>,
2019-09-19 02:58:52 -07:00
id_to_fqsn: HashMap<ItemId, FullyQualifiedSymbolName>,
2019-10-17 03:15:39 -07:00
symbol_trie: SymbolTrie,
functions: NameTable, //TODO maybe bindings and functions should be the same table?
types: NameTable,
bindings: NameTable, //TODO NameTable should be a trie to facilitate quick lookups
}
impl SymbolTable {
pub fn new() -> SymbolTable {
2019-03-07 23:51:31 -08:00
SymbolTable {
2021-10-14 06:55:57 -07:00
decl_locations: DeclLocations::new(),
symbol_path_to_symbol: HashMap::new(),
2019-09-19 02:58:52 -07:00
id_to_fqsn: HashMap::new(),
symbol_trie: SymbolTrie::new(),
functions: NameTable::new(),
types: NameTable::new(),
bindings: NameTable::new(),
2019-03-07 23:51:31 -08:00
}
}
2018-08-05 18:19:48 -07:00
2019-09-19 02:58:52 -07:00
pub fn map_id_to_fqsn(&mut self, id: &ItemId, fqsn: FullyQualifiedSymbolName) {
self.id_to_fqsn.insert(id.clone(), fqsn);
}
pub fn get_fqsn_from_id(&self, id: &ItemId) -> Option<FullyQualifiedSymbolName> {
self.id_to_fqsn.get(&id).cloned()
}
2019-09-25 02:18:36 -07:00
fn add_new_symbol(&mut self, local_name: &Rc<String>, scope_path: &Vec<ScopeSegment>, spec: SymbolSpec) {
let mut vec: Vec<ScopeSegment> = scope_path.clone();
vec.push(ScopeSegment { name: local_name.clone() });
let fully_qualified_name = FullyQualifiedSymbolName(vec);
2019-09-25 02:18:36 -07:00
let symbol = Symbol { local_name: local_name.clone(), fully_qualified_name: fully_qualified_name.clone(), spec };
2019-10-18 09:54:56 -07:00
self.symbol_trie.insert(&fully_qualified_name);
self.symbol_path_to_symbol.insert(fully_qualified_name, symbol);
}
pub fn lookup_by_fqsn(&self, fully_qualified_path: &FullyQualifiedSymbolName) -> Option<&Symbol> {
self.symbol_path_to_symbol.get(fully_qualified_path)
2019-03-10 16:04:20 -07:00
}
2019-10-18 09:54:56 -07:00
pub fn lookup_children_of_fqsn(&self, path: &FullyQualifiedSymbolName) -> Vec<FullyQualifiedSymbolName> {
self.symbol_trie.get_children(path)
}
}
2021-10-13 00:53:32 -07:00
#[allow(dead_code)]
#[derive(Debug)]
pub struct Symbol {
2021-10-14 06:52:50 -07:00
pub local_name: Rc<String>,
fully_qualified_name: FullyQualifiedSymbolName,
pub spec: SymbolSpec,
}
2018-06-03 23:04:07 -07:00
impl fmt::Display for Symbol {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "<Local name: {}, Spec: {}>", self.local_name, self.spec)
2018-06-03 23:04:07 -07:00
}
}
#[derive(Debug)]
pub enum SymbolSpec {
2018-06-03 02:39:49 -07:00
Func(Vec<TypeName>),
2018-05-30 23:54:24 -07:00
DataConstructor {
index: usize,
type_name: TypeName,
2018-05-30 23:54:24 -07:00
type_args: Vec<Rc<String>>,
},
2019-01-25 00:57:01 -08:00
RecordConstructor {
index: usize,
members: HashMap<Rc<String>, TypeName>,
type_name: TypeName,
},
2019-10-16 20:22:40 -07:00
Binding,
Type {
name: TypeName
},
}
2018-06-03 23:04:07 -07:00
impl fmt::Display for SymbolSpec {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
use self::SymbolSpec::*;
match self {
Func(type_names) => write!(f, "Func({:?})", type_names),
DataConstructor { index, type_name, type_args } => write!(f, "DataConstructor(idx: {})({:?} -> {})", index, type_args, type_name),
RecordConstructor { type_name, index, ..} => write!(f, "RecordConstructor(idx: {})(<members> -> {})", index, type_name),
Binding => write!(f, "Binding"),
2019-10-16 20:22:40 -07:00
Type { name } => write!(f, "Type <{}>", name),
2018-06-03 23:04:07 -07:00
}
}
}
impl SymbolTable {
/* note: this adds names for *forward reference* but doesn't actually create any types. solve that problem
* later */
/// This function traverses the AST and adds symbol table entries for
/// constants, functions, types, and modules defined within. This simultaneously
/// checks for dupicate definitions (and returns errors if discovered), and sets
/// up name tables that will be used by further parts of the compiler
pub fn process_ast(&mut self, ast: &ast::AST) -> Result<(), String> {
let mut scope_stack = vec![Scope::Top];
self.add_from_scope(ast.statements.as_ref(), &mut scope_stack)
.map_err(|err| format!("{:?}", err))?;
Ok(())
}
//TODO this should probably return a vector of duplicate name errors
fn add_from_scope<'a>(&'a mut self, statements: &[Statement], scope_stack: &mut Vec<Scope>) -> Result<(), DuplicateName> {
for statement in statements {
let Statement { id: _, kind, location } = statement;
let location = *location;
match kind {
StatementKind::Declaration(Declaration::FuncSig(signature)) => {
let fn_name: String = signature.name.as_str().to_owned();
let fq_function = FQSN::from_scope_stack(scope_stack.as_ref(), fn_name);
self.functions.register(fq_function.clone(), location)?;
self.types.register(fq_function, location)?;
}
StatementKind::Declaration(Declaration::FuncDecl(signature, body)) => {
let fn_name: String = signature.name.as_str().to_owned();
let new_scope = Scope::Name(fn_name.clone());
let fq_function = FQSN::from_scope_stack(scope_stack.as_ref(), fn_name);
self.functions.register(fq_function.clone(), location)?;
self.types.register(fq_function, location)?;
scope_stack.push(new_scope);
let output = self.add_from_scope(body.as_ref(), scope_stack);
scope_stack.pop();
output?
},
StatementKind::Declaration(Declaration::TypeDecl { name, body, mutable }) => {
let fq_type = FQSN::from_scope_stack(scope_stack.as_ref(), name.name.as_ref().to_owned());
self.types.register(fq_type, location)?;
if let Err(errors) = self.add_type_members(name, body, mutable, location, scope_stack) {
return Err(errors[0].clone());
}
},
StatementKind::Declaration(Declaration::Binding { name, .. }) => {
let fq_binding = FQSN::from_scope_stack(scope_stack.as_ref(), name.as_str().to_owned());
self.bindings.register(fq_binding, location)?;
}
StatementKind::Module(ModuleSpecifier { name, contents }) => {
let mod_name = name.as_str().to_owned();
let fq_module = FQSN::from_scope_stack(scope_stack.as_ref(), mod_name.clone());
let new_scope = Scope::Name(mod_name);
self.bindings.register(fq_module, location)?;
scope_stack.push(new_scope);
let output = self.add_from_scope(contents.as_ref(), scope_stack);
scope_stack.pop();
output?
},
_ => (),
}
}
Ok(())
}
fn add_type_members(&mut self, type_name: &TypeSingletonName, type_body: &TypeBody, _mutable: &bool, location: Location, scope_stack: &mut Vec<Scope>) -> Result<(), Vec<DuplicateName>> {
let mut errors = vec![];
let mut register = |fqsn: FQSN| {
if let Err(err) = self.types.register(fqsn, location) {
errors.push(err);
}
};
let TypeBody(variants) = type_body;
let new_scope = Scope::Name(type_name.name.as_ref().to_owned());
scope_stack.push(new_scope);
for variant in variants {
match variant {
Variant::UnitStruct(name) | Variant::TupleStruct(name, _) => {
let fq_name = FQSN::from_scope_stack(scope_stack.as_ref(), name.as_ref().to_owned());
register(fq_name);
},
Variant::Record { name, members } => {
let fq_name = FQSN::from_scope_stack(scope_stack.as_ref(), name.as_ref().to_owned());
register(fq_name.clone());
for (field_name, _) in members {
let fq_field_name = fq_name.extend(field_name.as_str().to_owned());
register(fq_field_name);
}
}
}
}
scope_stack.pop();
if errors.is_empty() {
Ok(())
} else {
Err(errors)
}
}
2018-06-04 19:25:40 -07:00
pub fn add_top_level_symbols(&mut self, ast: &ast::AST) -> Result<(), String> {
2019-03-10 16:04:20 -07:00
let mut scope_name_stack = Vec::new();
2019-09-11 19:06:00 -07:00
self.add_symbols_from_scope(&ast.statements, &mut scope_name_stack)
}
fn add_symbols_from_scope<'a>(&'a mut self, statements: &Vec<Statement>, scope_name_stack: &mut Vec<ScopeSegment>) -> Result<(), String> {
2018-06-04 19:25:40 -07:00
use self::ast::Declaration::*;
let mut seen_identifiers = DuplicateNameTrackTable::new();
let mut seen_modules = DuplicateNameTrackTable::new();
for statement in statements.iter() {
match statement {
Statement { kind: StatementKind::Declaration(decl), id, location, } => {
2021-10-14 06:55:57 -07:00
self.decl_locations.add_location(id, *location);
match decl {
FuncSig(ref signature) => {
2021-10-14 06:55:57 -07:00
seen_identifiers.try_register(&signature.name, id, &self.decl_locations)
.map_err(|line| format!("Duplicate function definition: {}. It's already defined at {}", signature.name, line))?;
self.add_function_signature(signature, scope_name_stack)?
}
FuncDecl(ref signature, ref body) => {
2021-10-14 06:55:57 -07:00
seen_identifiers.try_register(&signature.name, id, &self.decl_locations)
.map_err(|line| format!("Duplicate function definition: {}. It's already defined at {}", signature.name, line))?;
self.add_function_signature(signature, scope_name_stack)?;
scope_name_stack.push(ScopeSegment{
name: signature.name.clone(),
});
let output = self.add_symbols_from_scope(body, scope_name_stack);
2019-10-24 02:13:07 -07:00
scope_name_stack.pop();
output?
},
TypeDecl { name, body, mutable } => {
2021-10-14 06:55:57 -07:00
seen_identifiers.try_register(&name.name, &id, &self.decl_locations)
.map_err(|line| format!("Duplicate type definition: {}. It's already defined at {}", name.name, line))?;
self.add_type_decl(name, body, mutable, scope_name_stack)?
},
Binding { name, .. } => {
2021-10-14 06:55:57 -07:00
seen_identifiers.try_register(&name, &id, &self.decl_locations)
.map_err(|line| format!("Duplicate variable definition: {}. It's already defined at {}", name, line))?;
self.add_new_symbol(name, scope_name_stack, SymbolSpec::Binding);
}
_ => ()
}
},
Statement { kind: StatementKind::Module(ModuleSpecifier { name, contents}), id, location } => {
2021-10-14 06:55:57 -07:00
self.decl_locations.add_location(id, *location);
seen_modules.try_register(name, id, &self.decl_locations)
2019-10-24 02:13:07 -07:00
.map_err(|line| format!("Duplicate module definition: {}. It's already defined at {}", name, line))?;
scope_name_stack.push(ScopeSegment { name: name.clone() });
let output = self.add_symbols_from_scope(contents, scope_name_stack);
scope_name_stack.pop();
output?
},
_ => ()
}
}
Ok(())
}
#[allow(dead_code)]
pub fn debug_symbol_table(&self) -> String {
2021-10-14 06:52:50 -07:00
let mut output = "Symbol table\n".to_string();
2019-09-25 02:43:07 -07:00
let mut sorted_symbols: Vec<(&FullyQualifiedSymbolName, &Symbol)> = self.symbol_path_to_symbol.iter().collect();
sorted_symbols.sort_by(|(fqsn, _), (other_fqsn, _)| fqsn.cmp(other_fqsn));
for (name, sym) in sorted_symbols.iter() {
write!(output, "{} -> {}\n", name, sym).unwrap();
}
output
}
2019-01-20 00:22:35 -08:00
fn add_function_signature(&mut self, signature: &Signature, scope_name_stack: &mut Vec<ScopeSegment>) -> Result<(), String> {
2019-01-20 00:22:35 -08:00
let mut local_type_context = LocalTypeContext::new();
2019-06-16 14:56:52 -07:00
let types = signature.params.iter().map(|param| match param.anno {
Some(ref type_identifier) => Rc::new(format!("{:?}", type_identifier)),
None => local_type_context.new_universal_type()
2019-01-20 00:22:35 -08:00
}).collect();
self.add_new_symbol(&signature.name, scope_name_stack, SymbolSpec::Func(types));
2019-01-20 22:13:05 -08:00
Ok(())
}
2019-08-12 11:27:16 -07:00
//TODO handle type mutability
fn add_type_decl(&mut self, type_name: &TypeSingletonName, body: &TypeBody, _mutable: &bool, scope_name_stack: &mut Vec<ScopeSegment>) -> Result<(), String> {
use crate::ast::{TypeIdentifier};
2019-01-20 22:13:05 -08:00
let TypeBody(variants) = body;
let ref type_name = type_name.name;
2019-10-16 20:22:40 -07:00
let type_spec = SymbolSpec::Type {
name: type_name.clone(),
};
self.add_new_symbol(type_name, &scope_name_stack, type_spec);
2019-09-10 03:31:23 -07:00
scope_name_stack.push(ScopeSegment{
name: type_name.clone(),
});
2019-01-20 22:13:05 -08:00
//TODO figure out why _params isn't being used here
for (index, var) in variants.iter().enumerate() {
match var {
Variant::UnitStruct(variant_name) => {
let spec = SymbolSpec::DataConstructor {
index,
type_name: type_name.clone(),
2019-01-20 22:13:05 -08:00
type_args: vec![],
};
self.add_new_symbol(variant_name, scope_name_stack, spec);
2019-01-20 22:13:05 -08:00
},
Variant::TupleStruct(variant_name, tuple_members) => {
//TODO fix the notion of a tuple type
2019-01-20 22:13:05 -08:00
let type_args = tuple_members.iter().map(|type_name| match type_name {
TypeIdentifier::Singleton(TypeSingletonName { name, ..}) => name.clone(),
TypeIdentifier::Tuple(_) => unimplemented!(),
}).collect();
let spec = SymbolSpec::DataConstructor {
index,
type_name: type_name.clone(),
2019-01-20 22:13:05 -08:00
type_args
};
self.add_new_symbol(variant_name, scope_name_stack, spec);
2019-01-20 22:13:05 -08:00
},
Variant::Record { name, members: defined_members } => {
let mut members = HashMap::new();
let mut duplicate_member_definitions = Vec::new();
for (member_name, member_type) in defined_members {
match members.entry(member_name.clone()) {
Entry::Occupied(_) => duplicate_member_definitions.push(member_name.clone()),
Entry::Vacant(v) => {
v.insert(match member_type {
TypeIdentifier::Singleton(TypeSingletonName { name, ..}) => name.clone(),
TypeIdentifier::Tuple(_) => unimplemented!(),
});
}
}
}
if duplicate_member_definitions.len() != 0 {
return Err(format!("Duplicate member(s) in definition of type {}: {:?}", type_name, duplicate_member_definitions));
}
let spec = SymbolSpec::RecordConstructor { index, type_name: type_name.clone(), members };
self.add_new_symbol(name, scope_name_stack, spec);
2019-01-25 00:57:01 -08:00
},
2019-01-20 22:13:05 -08:00
}
}
2019-09-10 03:31:23 -07:00
scope_name_stack.pop();
2019-01-20 22:13:05 -08:00
Ok(())
2019-01-20 00:22:35 -08:00
}
}
struct LocalTypeContext {
state: u8
}
impl LocalTypeContext {
fn new() -> LocalTypeContext {
LocalTypeContext { state: 0 }
}
fn new_universal_type(&mut self) -> TypeName {
let n = self.state;
self.state += 1;
Rc::new(format!("{}", (('a' as u8) + n) as char))
}
}
2019-03-07 23:51:31 -08:00