Break up symbol table code into smaller modules

This commit is contained in:
Greg Shuflin 2021-11-02 20:49:38 -07:00
parent eb6a7e95a9
commit 45c72f97a2
2 changed files with 324 additions and 314 deletions

View File

@ -1,23 +1,21 @@
#![allow(clippy::enum_variant_names)] #![allow(clippy::enum_variant_names)]
use std::{ use std::{
collections::{hash_map::Entry, HashMap, HashSet}, collections::{hash_map::Entry, HashMap},
fmt, fmt,
rc::Rc, rc::Rc,
str::FromStr,
}; };
use crate::{ use crate::{
ast, ast,
ast::{ ast::ItemId,
Declaration, Expression, ExpressionKind, ItemId, Statement, StatementKind, TypeBody,
TypeSingletonName, Variant, VariantKind,
},
builtin::Builtin, builtin::Builtin,
tokenizing::Location, tokenizing::Location,
type_inference::{self, PendingType, TypeBuilder, TypeContext, TypeId, VariantBuilder}, type_inference::{TypeContext, TypeId},
}; };
mod populator;
use populator::SymbolTablePopulator;
mod fqsn; mod fqsn;
pub use fqsn::{Fqsn, ScopeSegment}; pub use fqsn::{Fqsn, ScopeSegment};
mod resolver; mod resolver;
@ -94,18 +92,16 @@ pub struct SymbolTable {
} }
impl SymbolTable { impl SymbolTable {
pub fn new() -> SymbolTable { /// Create a new, empty SymbolTable
let table = SymbolTable { pub fn new() -> Self {
Self {
def_id_store: IdStore::new(), def_id_store: IdStore::new(),
symbol_trie: SymbolTrie::new(), symbol_trie: SymbolTrie::new(),
fq_names: NameTable::new(), fq_names: NameTable::new(),
types: NameTable::new(), types: NameTable::new(),
id_to_def: HashMap::new(), id_to_def: HashMap::new(),
def_to_symbol: HashMap::new(), def_to_symbol: HashMap::new(),
}; }
table
} }
/// The main entry point into the symbol table. This will traverse the AST in several /// The main entry point into the symbol table. This will traverse the AST in several
@ -116,13 +112,18 @@ impl SymbolTable {
ast: &ast::AST, ast: &ast::AST,
type_context: &mut TypeContext, type_context: &mut TypeContext,
) -> Result<(), Vec<SymbolError>> { ) -> Result<(), Vec<SymbolError>> {
let mut runner = SymbolTableRunner { type_context, table: self }; let mut populator = SymbolTablePopulator { type_context, table: self };
let errs = runner.populate_name_tables(ast); let errs = populator.populate_name_tables(ast);
if !errs.is_empty() { if !errs.is_empty() {
return Err(errs); return Err(errs);
} }
runner.resolve_scopes(ast);
// Walks the AST, matching the ID of an identifier used in some expression to
// the corresponding Symbol.
let mut resolver = resolver::ScopeResolver::new(self);
resolver.resolve(ast);
Ok(()) Ok(())
} }
@ -168,11 +169,6 @@ impl SymbolTable {
} }
} }
struct SymbolTableRunner<'a> {
type_context: &'a mut TypeContext,
table: &'a mut SymbolTable,
}
#[allow(dead_code)] #[allow(dead_code)]
#[derive(Debug, Clone)] #[derive(Debug, Clone)]
pub struct Symbol { pub struct Symbol {
@ -239,296 +235,3 @@ impl fmt::Display for SymbolSpec {
} }
} }
} }
impl<'a> SymbolTableRunner<'a> {
/* note: this adds names for *forward reference* but doesn't actually create any types. solve that problem
* later */
fn add_symbol(&mut self, id: &ItemId, fqsn: Fqsn, spec: SymbolSpec) {
self.table.add_symbol(id, fqsn, spec)
}
/// Walks the AST, matching the ID of an identifier used in some expression to
/// the corresponding Symbol.
fn resolve_scopes(&mut self, ast: &ast::AST) {
let mut resolver = resolver::ScopeResolver::new(self.table);
resolver.resolve(ast);
}
/// This function traverses the AST and adds symbol table entries for
/// constants, functions, types, and modules defined within. This simultaneously
/// checks for dupicate definitions (and returns errors if discovered), and sets
/// up name tables that will be used by further parts of the compiler
fn populate_name_tables(&mut self, ast: &ast::AST) -> Vec<SymbolError> {
let mut scope_stack = vec![];
self.add_from_scope(ast.statements.as_ref(), &mut scope_stack, false)
}
fn add_from_scope(
&mut self,
statements: &[Statement],
scope_stack: &mut Vec<ScopeSegment>,
function_scope: bool,
) -> Vec<SymbolError> {
let mut errors = vec![];
for statement in statements {
let Statement { id, kind, location } = statement;
let location = *location;
if let Err(err) = self.add_single_statement(id, kind, location, scope_stack, function_scope) {
errors.push(err);
} else {
// If there's an error with a name, don't recurse into subscopes of that name
let recursive_errs = match kind {
StatementKind::Declaration(Declaration::FuncDecl(signature, body)) => {
let new_scope = ScopeSegment::Name(signature.name.clone());
scope_stack.push(new_scope);
let output = self.add_from_scope(body.as_ref(), scope_stack, true);
scope_stack.pop();
output
}
StatementKind::Declaration(Declaration::Module { name, items }) => {
let new_scope = ScopeSegment::Name(name.clone());
scope_stack.push(new_scope);
let output = self.add_from_scope(items.as_ref(), scope_stack, false);
scope_stack.pop();
output
}
StatementKind::Declaration(Declaration::TypeDecl { name, body, mutable }) =>
self.add_type_members(name, body, mutable, location, scope_stack),
_ => vec![],
};
errors.extend(recursive_errs.into_iter());
}
}
errors
}
fn add_single_statement(
&mut self,
id: &ItemId,
kind: &StatementKind,
location: Location,
scope_stack: &[ScopeSegment],
function_scope: bool,
) -> Result<(), SymbolError> {
match kind {
StatementKind::Declaration(Declaration::FuncSig(signature)) => {
let fq_function = Fqsn::from_scope_stack(scope_stack, signature.name.clone());
self.table
.fq_names
.register(fq_function.clone(), NameSpec { location, kind: NameKind::Function })?;
self.table.types.register(fq_function.clone(), NameSpec { location, kind: TypeKind })?;
self.add_symbol(id, fq_function, SymbolSpec::Func);
}
StatementKind::Declaration(Declaration::FuncDecl(signature, ..)) => {
let fn_name = &signature.name;
let fq_function = Fqsn::from_scope_stack(scope_stack, fn_name.clone());
self.table
.fq_names
.register(fq_function.clone(), NameSpec { location, kind: NameKind::Function })?;
self.table.types.register(fq_function.clone(), NameSpec { location, kind: TypeKind })?;
self.add_symbol(id, fq_function, SymbolSpec::Func);
}
StatementKind::Declaration(Declaration::TypeDecl { name, .. }) => {
let fq_type = Fqsn::from_scope_stack(scope_stack, name.name.clone());
self.table.types.register(fq_type, NameSpec { location, kind: TypeKind })?;
}
StatementKind::Declaration(Declaration::Binding { name, .. }) => {
let fq_binding = Fqsn::from_scope_stack(scope_stack, name.clone());
self.table
.fq_names
.register(fq_binding.clone(), NameSpec { location, kind: NameKind::Binding })?;
if !function_scope {
self.add_symbol(id, fq_binding, SymbolSpec::GlobalBinding);
}
}
StatementKind::Declaration(Declaration::Module { name, .. }) => {
let fq_module = Fqsn::from_scope_stack(scope_stack, name.clone());
self.table.fq_names.register(fq_module, NameSpec { location, kind: NameKind::Module })?;
}
StatementKind::Declaration(Declaration::Annotation { name, arguments, inner }) => {
let inner = inner.as_ref();
self.add_single_statement(
&inner.id,
&inner.kind,
inner.location,
scope_stack,
function_scope,
)?;
self.process_annotation(name.as_ref(), arguments.as_slice(), scope_stack, inner)?;
}
_ => (),
}
Ok(())
}
fn process_annotation(
&mut self,
name: &str,
arguments: &[Expression],
scope_stack: &[ScopeSegment],
inner: &Statement,
) -> Result<(), SymbolError> {
println!("handling annotation: {}", name);
if name == "register_builtin" {
if let Statement {
id: _,
location: _,
kind: StatementKind::Declaration(Declaration::FuncDecl(sig, _)),
} = inner
{
let fqsn = Fqsn::from_scope_stack(scope_stack, sig.name.clone());
let builtin_name = match arguments {
[Expression { kind: ExpressionKind::Value(qname), .. }]
if qname.components.len() == 1 =>
qname.components[0].clone(),
_ =>
return Err(SymbolError::BadAnnotation {
name: name.to_string(),
msg: "Bad argument for register_builtin".to_string(),
}),
};
let builtin =
Builtin::from_str(builtin_name.as_str()).map_err(|_| SymbolError::BadAnnotation {
name: name.to_string(),
msg: format!("Invalid builtin: {}", builtin_name),
})?;
self.table.populate_single_builtin(fqsn, builtin);
Ok(())
} else {
Err(SymbolError::BadAnnotation {
name: name.to_string(),
msg: "register_builtin not annotating a function".to_string(),
})
}
} else {
Err(SymbolError::UnknownAnnotation { name: name.to_string() })
}
}
fn add_type_members(
&mut self,
type_name: &TypeSingletonName,
type_body: &TypeBody,
_mutable: &bool,
location: Location,
scope_stack: &mut Vec<ScopeSegment>,
) -> Vec<SymbolError> {
let (variants, immediate_variant) = match type_body {
TypeBody::Variants(variants) => (variants.clone(), false),
TypeBody::ImmediateRecord(id, fields) => (
vec![Variant {
id: *id,
name: type_name.name.clone(),
kind: VariantKind::Record(fields.clone()),
}],
true,
),
};
let type_fqsn = Fqsn::from_scope_stack(scope_stack, type_name.name.clone());
let new_scope = ScopeSegment::Name(type_name.name.clone());
scope_stack.push(new_scope);
// Check for duplicates before registering any types with the TypeContext
let mut seen_variants = HashSet::new();
let mut errors = vec![];
for variant in variants.iter() {
if seen_variants.contains(&variant.name) {
errors.push(SymbolError::DuplicateVariant {
type_fqsn: type_fqsn.clone(),
name: variant.name.as_ref().to_string(),
})
}
seen_variants.insert(variant.name.clone());
if let VariantKind::Record(ref members) = variant.kind {
let variant_name = Fqsn::from_scope_stack(scope_stack.as_ref(), variant.name.clone());
let mut seen_members = HashMap::new();
for (member_name, _) in members.iter() {
match seen_members.entry(member_name.as_ref()) {
Entry::Occupied(o) => {
let location = *o.get();
errors.push(SymbolError::DuplicateRecord {
type_name: variant_name.clone(),
location,
member: member_name.as_ref().to_string(),
});
}
//TODO eventually this should track meaningful locations
Entry::Vacant(v) => {
v.insert(location);
}
}
}
}
}
if !errors.is_empty() {
return errors;
}
let mut type_builder = TypeBuilder::new(type_name.name.as_ref());
let mut fqsn_id_map = HashMap::new();
for variant in variants.iter() {
let Variant { name, kind, id } = variant;
fqsn_id_map.insert(Fqsn::from_scope_stack(scope_stack.as_ref(), name.clone()), id);
let mut variant_builder = VariantBuilder::new(name.as_ref());
match kind {
VariantKind::UnitStruct => (),
VariantKind::TupleStruct(items) =>
for type_identifier in items {
let pending: PendingType = type_identifier.into();
variant_builder.add_member(pending);
},
VariantKind::Record(members) =>
for (field_name, type_identifier) in members.iter() {
let pending: PendingType = type_identifier.into();
variant_builder.add_record_member(field_name.as_ref(), pending);
},
}
type_builder.add_variant(variant_builder);
}
let type_id = self.type_context.register_type(type_builder);
let type_definition = self.type_context.lookup_type(&type_id).unwrap();
// This index is guaranteed to be the correct tag
for (index, variant) in type_definition.variants.iter().enumerate() {
let fqsn = Fqsn::from_scope_stack(scope_stack.as_ref(), Rc::new(variant.name.to_string()));
let id = fqsn_id_map.get(&fqsn).unwrap();
let tag = index as u32;
let spec = match &variant.members {
type_inference::VariantMembers::Unit => SymbolSpec::DataConstructor { tag, type_id },
type_inference::VariantMembers::Tuple(..) => SymbolSpec::DataConstructor { tag, type_id },
type_inference::VariantMembers::Record(..) => SymbolSpec::RecordConstructor { tag, type_id },
};
self.table.add_symbol(id, fqsn, spec);
}
if immediate_variant {
let variant = &type_definition.variants[0];
let fqsn = Fqsn::from_scope_stack(scope_stack.as_ref(), Rc::new(variant.name.to_string()));
let id = fqsn_id_map.get(&fqsn).unwrap();
let abbrev_fqsn = Fqsn::from_scope_stack(
scope_stack[0..scope_stack.len() - 1].as_ref(),
Rc::new(variant.name.to_string()),
);
let spec = SymbolSpec::RecordConstructor { tag: 0, type_id };
self.table.add_symbol(id, abbrev_fqsn, spec);
}
scope_stack.pop();
vec![]
}
}

View File

@ -0,0 +1,307 @@
use std::{
collections::{hash_map::Entry, HashMap, HashSet},
rc::Rc,
str::FromStr,
};
use super::{Fqsn, NameKind, NameSpec, ScopeSegment, SymbolError, SymbolSpec, SymbolTable, TypeKind};
use crate::{
ast::{
Declaration, Expression, ExpressionKind, ItemId, Statement, StatementKind, TypeBody,
TypeSingletonName, Variant, VariantKind, AST,
},
builtin::Builtin,
tokenizing::Location,
type_inference::{self, PendingType, TypeBuilder, TypeContext, VariantBuilder},
};
pub(super) struct SymbolTablePopulator<'a> {
pub(super) type_context: &'a mut TypeContext,
pub(super) table: &'a mut SymbolTable,
}
impl<'a> SymbolTablePopulator<'a> {
/* note: this adds names for *forward reference* but doesn't actually create any types. solve that problem
* later */
fn add_symbol(&mut self, id: &ItemId, fqsn: Fqsn, spec: SymbolSpec) {
self.table.add_symbol(id, fqsn, spec)
}
/// This function traverses the AST and adds symbol table entries for
/// constants, functions, types, and modules defined within. This simultaneously
/// checks for dupicate definitions (and returns errors if discovered), and sets
/// up name tables that will be used by further parts of the compiler
pub fn populate_name_tables(&mut self, ast: &AST) -> Vec<SymbolError> {
let mut scope_stack = vec![];
self.add_from_scope(ast.statements.as_ref(), &mut scope_stack, false)
}
fn add_from_scope(
&mut self,
statements: &[Statement],
scope_stack: &mut Vec<ScopeSegment>,
function_scope: bool,
) -> Vec<SymbolError> {
let mut errors = vec![];
for statement in statements {
let Statement { id, kind, location } = statement;
let location = *location;
if let Err(err) = self.add_single_statement(id, kind, location, scope_stack, function_scope) {
errors.push(err);
} else {
// If there's an error with a name, don't recurse into subscopes of that name
let recursive_errs = match kind {
StatementKind::Declaration(Declaration::FuncDecl(signature, body)) => {
let new_scope = ScopeSegment::Name(signature.name.clone());
scope_stack.push(new_scope);
let output = self.add_from_scope(body.as_ref(), scope_stack, true);
scope_stack.pop();
output
}
StatementKind::Declaration(Declaration::Module { name, items }) => {
let new_scope = ScopeSegment::Name(name.clone());
scope_stack.push(new_scope);
let output = self.add_from_scope(items.as_ref(), scope_stack, false);
scope_stack.pop();
output
}
StatementKind::Declaration(Declaration::TypeDecl { name, body, mutable }) =>
self.add_type_members(name, body, mutable, location, scope_stack),
_ => vec![],
};
errors.extend(recursive_errs.into_iter());
}
}
errors
}
fn add_single_statement(
&mut self,
id: &ItemId,
kind: &StatementKind,
location: Location,
scope_stack: &[ScopeSegment],
function_scope: bool,
) -> Result<(), SymbolError> {
match kind {
StatementKind::Declaration(Declaration::FuncSig(signature)) => {
let fq_function = Fqsn::from_scope_stack(scope_stack, signature.name.clone());
self.table
.fq_names
.register(fq_function.clone(), NameSpec { location, kind: NameKind::Function })?;
self.table.types.register(fq_function.clone(), NameSpec { location, kind: TypeKind })?;
self.add_symbol(id, fq_function, SymbolSpec::Func);
}
StatementKind::Declaration(Declaration::FuncDecl(signature, ..)) => {
let fn_name = &signature.name;
let fq_function = Fqsn::from_scope_stack(scope_stack, fn_name.clone());
self.table
.fq_names
.register(fq_function.clone(), NameSpec { location, kind: NameKind::Function })?;
self.table.types.register(fq_function.clone(), NameSpec { location, kind: TypeKind })?;
self.add_symbol(id, fq_function, SymbolSpec::Func);
}
StatementKind::Declaration(Declaration::TypeDecl { name, .. }) => {
let fq_type = Fqsn::from_scope_stack(scope_stack, name.name.clone());
self.table.types.register(fq_type, NameSpec { location, kind: TypeKind })?;
}
StatementKind::Declaration(Declaration::Binding { name, .. }) => {
let fq_binding = Fqsn::from_scope_stack(scope_stack, name.clone());
self.table
.fq_names
.register(fq_binding.clone(), NameSpec { location, kind: NameKind::Binding })?;
if !function_scope {
self.add_symbol(id, fq_binding, SymbolSpec::GlobalBinding);
}
}
StatementKind::Declaration(Declaration::Module { name, .. }) => {
let fq_module = Fqsn::from_scope_stack(scope_stack, name.clone());
self.table.fq_names.register(fq_module, NameSpec { location, kind: NameKind::Module })?;
}
StatementKind::Declaration(Declaration::Annotation { name, arguments, inner }) => {
let inner = inner.as_ref();
self.add_single_statement(
&inner.id,
&inner.kind,
inner.location,
scope_stack,
function_scope,
)?;
self.process_annotation(name.as_ref(), arguments.as_slice(), scope_stack, inner)?;
}
_ => (),
}
Ok(())
}
fn process_annotation(
&mut self,
name: &str,
arguments: &[Expression],
scope_stack: &[ScopeSegment],
inner: &Statement,
) -> Result<(), SymbolError> {
println!("handling annotation: {}", name);
if name == "register_builtin" {
if let Statement {
id: _,
location: _,
kind: StatementKind::Declaration(Declaration::FuncDecl(sig, _)),
} = inner
{
let fqsn = Fqsn::from_scope_stack(scope_stack, sig.name.clone());
let builtin_name = match arguments {
[Expression { kind: ExpressionKind::Value(qname), .. }]
if qname.components.len() == 1 =>
qname.components[0].clone(),
_ =>
return Err(SymbolError::BadAnnotation {
name: name.to_string(),
msg: "Bad argument for register_builtin".to_string(),
}),
};
let builtin =
Builtin::from_str(builtin_name.as_str()).map_err(|_| SymbolError::BadAnnotation {
name: name.to_string(),
msg: format!("Invalid builtin: {}", builtin_name),
})?;
self.table.populate_single_builtin(fqsn, builtin);
Ok(())
} else {
Err(SymbolError::BadAnnotation {
name: name.to_string(),
msg: "register_builtin not annotating a function".to_string(),
})
}
} else {
Err(SymbolError::UnknownAnnotation { name: name.to_string() })
}
}
fn add_type_members(
&mut self,
type_name: &TypeSingletonName,
type_body: &TypeBody,
_mutable: &bool,
location: Location,
scope_stack: &mut Vec<ScopeSegment>,
) -> Vec<SymbolError> {
let (variants, immediate_variant) = match type_body {
TypeBody::Variants(variants) => (variants.clone(), false),
TypeBody::ImmediateRecord(id, fields) => (
vec![Variant {
id: *id,
name: type_name.name.clone(),
kind: VariantKind::Record(fields.clone()),
}],
true,
),
};
let type_fqsn = Fqsn::from_scope_stack(scope_stack, type_name.name.clone());
let new_scope = ScopeSegment::Name(type_name.name.clone());
scope_stack.push(new_scope);
// Check for duplicates before registering any types with the TypeContext
let mut seen_variants = HashSet::new();
let mut errors = vec![];
for variant in variants.iter() {
if seen_variants.contains(&variant.name) {
errors.push(SymbolError::DuplicateVariant {
type_fqsn: type_fqsn.clone(),
name: variant.name.as_ref().to_string(),
})
}
seen_variants.insert(variant.name.clone());
if let VariantKind::Record(ref members) = variant.kind {
let variant_name = Fqsn::from_scope_stack(scope_stack.as_ref(), variant.name.clone());
let mut seen_members = HashMap::new();
for (member_name, _) in members.iter() {
match seen_members.entry(member_name.as_ref()) {
Entry::Occupied(o) => {
let location = *o.get();
errors.push(SymbolError::DuplicateRecord {
type_name: variant_name.clone(),
location,
member: member_name.as_ref().to_string(),
});
}
//TODO eventually this should track meaningful locations
Entry::Vacant(v) => {
v.insert(location);
}
}
}
}
}
if !errors.is_empty() {
return errors;
}
let mut type_builder = TypeBuilder::new(type_name.name.as_ref());
let mut fqsn_id_map = HashMap::new();
for variant in variants.iter() {
let Variant { name, kind, id } = variant;
fqsn_id_map.insert(Fqsn::from_scope_stack(scope_stack.as_ref(), name.clone()), id);
let mut variant_builder = VariantBuilder::new(name.as_ref());
match kind {
VariantKind::UnitStruct => (),
VariantKind::TupleStruct(items) =>
for type_identifier in items {
let pending: PendingType = type_identifier.into();
variant_builder.add_member(pending);
},
VariantKind::Record(members) =>
for (field_name, type_identifier) in members.iter() {
let pending: PendingType = type_identifier.into();
variant_builder.add_record_member(field_name.as_ref(), pending);
},
}
type_builder.add_variant(variant_builder);
}
let type_id = self.type_context.register_type(type_builder);
let type_definition = self.type_context.lookup_type(&type_id).unwrap();
// This index is guaranteed to be the correct tag
for (index, variant) in type_definition.variants.iter().enumerate() {
let fqsn = Fqsn::from_scope_stack(scope_stack.as_ref(), Rc::new(variant.name.to_string()));
let id = fqsn_id_map.get(&fqsn).unwrap();
let tag = index as u32;
let spec = match &variant.members {
type_inference::VariantMembers::Unit => SymbolSpec::DataConstructor { tag, type_id },
type_inference::VariantMembers::Tuple(..) => SymbolSpec::DataConstructor { tag, type_id },
type_inference::VariantMembers::Record(..) => SymbolSpec::RecordConstructor { tag, type_id },
};
self.table.add_symbol(id, fqsn, spec);
}
if immediate_variant {
let variant = &type_definition.variants[0];
let fqsn = Fqsn::from_scope_stack(scope_stack.as_ref(), Rc::new(variant.name.to_string()));
let id = fqsn_id_map.get(&fqsn).unwrap();
let abbrev_fqsn = Fqsn::from_scope_stack(
scope_stack[0..scope_stack.len() - 1].as_ref(),
Rc::new(variant.name.to_string()),
);
let spec = SymbolSpec::RecordConstructor { tag: 0, type_id };
self.table.add_symbol(id, abbrev_fqsn, spec);
}
scope_stack.pop();
vec![]
}
}