schala/HindleyMilner.hs

921 lines
32 KiB
Haskell
Raw Normal View History

{-# LANGUAGE GeneralizedNewtypeDeriving #-}
{-# LANGUAGE LambdaCase #-}
{-# LANGUAGE OverloadedLists #-}
{-# LANGUAGE OverloadedStrings #-}
-- | This module is an extensively documented walkthrough for typechecking a
-- basic functional language using the Hindley-Damas-Milner algorithm.
--
-- In the end, we'll be able to infer the type of expressions like
--
-- @
-- find (λx. (>) x 0)
-- :: [Integer] -> Either () Integer
-- @
--
-- It can be used in multiple different forms:
--
-- * The source is written in literate programming style, so you can almost
-- read it from top to bottom, minus some few references to later topics.
-- * /Loads/ of doctests (runnable and verified code examples) are included
-- * The code is runnable in GHCi, all definitions are exposed.
-- * A small main module that gives many examples of what you might try out in
-- GHCi is also included.
-- * The Haddock output yields a nice overview over the definitions given, with
-- a nice rendering of a truckload of Haddock comments.
module HindleyMilner where
import Control.Monad.Trans
import Control.Monad.Trans.Except
import Control.Monad.Trans.State
import Data.Map (Map)
import qualified Data.Map as M
import Data.Monoid
import Data.Set (Set)
import qualified Data.Set as S
import Data.String
import Data.Text (Text)
import qualified Data.Text as T
-- $setup
--
-- For running doctests:
--
-- >>> :set -XOverloadedStrings
-- >>> :set -XOverloadedLists
-- >>> :set -XLambdaCase
-- >>> import qualified Data.Text.IO as T
-- >>> let putPprLn = T.putStrLn . ppr
-- #############################################################################
-- #############################################################################
-- * Preliminaries
-- #############################################################################
-- #############################################################################
-- #############################################################################
-- ** Prettyprinting
-- #############################################################################
-- | A prettyprinter class. Similar to 'Show', but with a focus on having
-- human-readable output as opposed to being valid Haskell.
class Pretty a where
ppr :: a -> Text
-- #############################################################################
-- ** Names
-- #############################################################################
-- | A 'name' is an identifier in the language we're going to typecheck.
-- Variables on both the term and type level have 'Name's, for example.
newtype Name = Name Text
deriving (Eq, Ord, Show)
-- | >>> "lorem" :: Name
-- Name "lorem"
instance IsString Name where
fromString = Name . T.pack
-- | >>> putPprLn (Name "var")
-- var
instance Pretty Name where
ppr (Name n) = n
-- #############################################################################
-- ** Monotypes
-- #############################################################################
-- | A monotype is an unquantified/unparametric type, in other words it contains
-- no @forall@s. Monotypes are the inner building blocks of all types. Examples
-- of monotypes are @Int@, @a@, @a -> b@.
--
-- In formal notation, 'MType's are often called τ (tau) types.
data MType = TVar Name -- ^ @a@
| TFun MType MType -- ^ @a -> b@
| TConst Name -- ^ @Int@, @()@, …
-- Since we can't declare our own types in our simple type system
-- here, we'll hard-code certain basic ones so we can typecheck some
-- familar functions that use them later.
| TList MType -- ^ @[a]@
| TEither MType MType -- ^ @Either a b@
| TTuple MType MType -- ^ @(a,b)@
deriving Show
-- | >>> putPprLn (TFun (TEither (TVar "a") (TVar "b")) (TFun (TVar "c") (TVar "d")))
-- Either a b → c → d
--
-- Using the 'IsString' instance:
--
-- >>> putPprLn (TFun (TEither "a" "b") (TFun "c" "d"))
-- Either a b → c → d
instance Pretty MType where
ppr = go False
where
go _ (TVar name) = ppr name
go _ (TList a) = "[" <> ppr a <> "]"
go _ (TEither l r) = "Either " <> ppr l <> " " <> ppr r
go _ (TTuple a b) = "(" <> ppr a <> ", " <> ppr b <> ")"
go _ (TConst name) = ppr name
go parenthesize (TFun a b)
| parenthesize = "(" <> lhs <> "" <> rhs <> ")"
| otherwise = lhs <> "" <> rhs
where lhs = go True a
rhs = go False b
-- | >>> "var" :: MType
-- TVar (Name "var")
instance IsString MType where
fromString = TVar . fromString
-- | The free variables of an 'MType'. This is simply the collection of all the
-- individual type variables occurring inside of it.
--
-- __Example:__ The free variables of @a -> b@ are @a@ and @b@.
freeMType :: MType -> Set Name
freeMType = \case
TVar a -> [a]
TFun a b -> freeMType a <> freeMType b
TList a -> freeMType a
TEither l r -> freeMType l <> freeMType r
TTuple a b -> freeMType a <> freeMType b
TConst _ -> []
-- | Substitute all the contained type variables mentioned in the substitution,
-- and leave everything else alone.
instance Substitutable MType where
applySubst s = \case
TVar a -> let Subst s' = s
in M.findWithDefault (TVar a) a s'
TFun f x -> TFun (applySubst s f) (applySubst s x)
TList a -> TList (applySubst s a)
TEither l r -> TEither (applySubst s l) (applySubst s r)
TTuple a b -> TTuple (applySubst s a) (applySubst s b)
c@TConst {} -> c
-- #############################################################################
-- ** Polytypes
-- #############################################################################
-- | A polytype is a monotype universally quantified over a number of type
-- variables. In Haskell, all definitions have polytypes, but since the @forall@
-- is implicit they look a bit like monotypes, maybe confusingly so. For
-- example, the type of @1 :: Int@ is actually @forall <nothing>. Int@, and the
-- type of @id@ is @forall a. a -> a@, although GHC displays it as @a -> a@.
--
-- A polytype claims to work "for all imaginable type parameters", very similar
-- to how a lambda claims to work "for all imaginable value parameters". We can
-- insert a value into a lambda's parameter to evaluate it to a new value, and
-- similarly we'll later insert types into a polytype's quantified variables to
-- gain new types.
--
-- __Example:__ in a definition @id :: forall a. a -> a@, the @a@ after the
-- ∀ ("forall") is the collection of type variables, and @a -> a@ is the 'MType'
-- quantified over. When we have such an @id@, we also have its specialized
-- version @Int -> Int@ available. This process will be the topic of the type
-- inference/unification algorithms.
--
-- In formal notation, 'PType's are often called σ (sigma) types.
--
-- The purpose of having monotypes and polytypes is that we'd like to only have
-- universal quantification at the top level, restricting our language to rank-1
-- polymorphism, where type inferece is total (all types can be inferred) and
-- simple (only a handful of typing rules). Weakening this constraint would be
-- easy: if we allowed universal quantification within function types we would
-- get rank-N polymorphism. Taking it even further to allow it anywhere,
-- effectively replacing all occurrences of 'MType' with 'PType', yields
-- impredicative types. Both these extensions make the type system
-- *significantly* more complex though.
data PType = Forall (Set Name) MType -- ^ ∀{α}. τ
-- | >>> putPprLn (Forall ["a"] (TFun "a" "a"))
-- ∀a. a → a
instance Pretty PType where
ppr (Forall qs mType) = "" <> pprUniversals <> ". " <> ppr mType
where
pprUniversals
| S.null qs = ""
| otherwise = (T.intercalate " " . map ppr . S.toList) qs
-- | The free variables of a 'PType' are the free variables of the contained
-- 'MType', except those universally quantified.
--
-- >>> let sigma = Forall ["a"] (TFun "a" (TFun (TTuple "b" "a") "c"))
-- >>> putPprLn sigma
-- ∀a. a → (b, a) → c
-- >>> let display = T.putStrLn . T.intercalate ", " . foldMap (\x -> [ppr x])
-- >>> display (freePType sigma)
-- b, c
freePType :: PType -> Set Name
freePType (Forall qs mType) = freeMType mType `S.difference` qs
-- | Substitute all the free type variables.
instance Substitutable PType where
applySubst (Subst subst) (Forall qs mType) =
let qs' = M.fromSet (const ()) qs
subst' = Subst (subst `M.difference` qs')
in Forall qs (applySubst subst' mType)
-- #############################################################################
-- ** The environment
-- #############################################################################
-- | The environment consists of all the values available in scope, and their
-- associated polytypes. Other common names for it include "(typing) context",
-- and because of the commonly used symbol for it sometimes directly
-- \"Gamma"/@"Γ"@.
--
-- There are two kinds of membership in an environment,
--
-- - @∈@: an environment @Γ@ can be viewed as a set of @(value, type)@ pairs,
-- and we can test whether something is /literally contained/ by it via
-- x:σ ∈ Γ
-- - @⊢@, pronounced /entails/, describes all the things that are well-typed,
-- given an environment @Γ@. @Γ ⊢ x:τ@ can thus be seen as a judgement that
-- @x:τ@ is /figuratively contained/ in @Γ@.
--
-- For example, the environment @{x:Int}@ literally contains @x@, but given
-- this, it also entails @λy. x@, @λy z. x@, @let id = λy. y in id x@ and so on.
--
-- In Haskell terms, the environment consists of all the things you currently
-- have available, or that can be built by comining them. If you import the
-- Prelude, your environment entails
--
-- @
-- id → ∀a. a→a
-- map → ∀a b. (a→b) → [a] → [b]
-- putStrLn → ∀∅. String → IO ()
-- …
-- id map → ∀a b. (a→b) → [a] → [b]
-- map putStrLn → ∀∅. [String] -> [IO ()]
-- …
-- @
newtype Env = Env (Map Name PType)
-- | >>> :{
-- putPprLn (Env
-- [ ("id", Forall ["a"] (TFun "a" "a"))
-- , ("const", Forall ["a", "b"] (TFun "a" (TFun "b" "a"))) ])
-- :}
-- Γ = { const : ∀a b. a → b → a
-- , id : ∀a. a → a }
instance Pretty Env where
ppr (Env env) = "Γ = { " <> T.intercalate "\n , " pprBindings <> " }"
where
bindings = M.assocs env
pprBinding (name, pType) = ppr name <> " : " <> ppr pType
pprBindings = map pprBinding bindings
-- | The free variables of an 'Env'ironment are all the free variables of the
-- 'PType's it contains.
freeEnv :: Env -> Set Name
freeEnv (Env env) = let allPTypes = M.elems env
in S.unions (map freePType allPTypes)
-- | Performing a 'Subst'itution in an 'Env'ironment means performing that
-- substituion on all the contained 'PType's.
instance Substitutable Env where
applySubst s (Env env) = Env (M.map (applySubst s) env)
-- #############################################################################
-- ** Substitutions
-- #############################################################################
-- | A substitution is a mapping from type variables to 'MType's. Applying a
-- substitution means applying those replacements. For example, the substitution
-- @a -> Int@ applied to @a -> a@ yields the result @Int -> Int@.
--
-- A key concept behind Hindley-Milner is that once we dive deeper into an
-- expression, we learn more about our type variables. We might learn that @a@
-- has to be specialized to @b -> b@, and then later on that @b@ is actually
-- @Int@. Substitutions are an organized way of carrying this information along.
newtype Subst = Subst (Map Name MType)
-- | We're going to apply substitutions to a variety of other values that
-- somehow contain type variables, so we overload this application operation in
-- a class here.
--
-- Laws:
--
-- @
-- 'applySubst' 'mempty' ≡ 'id'
-- 'applySubst' (s1 '<>' s2) ≡ 'applySubst' s1 . 'applySubst' s2
-- @
class Substitutable a where
applySubst :: Subst -> a -> a
instance (Substitutable a, Substitutable b) => Substitutable (a,b) where
applySubst s (x,y) = (applySubst s x, applySubst s y)
-- | @'applySubst' s1 s2@ applies one substitution to another, replacing all the
-- bindings in the second argument @s2@ with their values mentioned in the first
-- one (@s1@).
instance Substitutable Subst where
applySubst s (Subst target) = Subst (fmap (applySubst s) target)
-- | >>> :{
-- putPprLn (Subst
-- [ ("a", TFun "b" "b")
-- , ("b", TEither "c" "d") ])
-- :}
-- { a > b → b
-- , b > Either c d }
instance Pretty Subst where
ppr (Subst s) = "{ " <> T.intercalate "\n, " [ ppr k <> " > " <> ppr v | (k,v) <- M.toList s ] <> " }"
-- | Combine two substitutions by applying all substitutions mentioned in the
-- first argument to the type variables contained in the second.
instance Monoid Subst where
-- Considering that all we can really do with a substitution is apply it, we
-- can use the one of 'Substitutable's laws to show that substitutions
-- combine associatively,
--
-- @
-- applySubst (compose s1 (compose s2 s3))
-- = applySubst s1 . applySubst (compose s2 s3)
-- = applySubst s1 . applySubst s2 . applySubst s3
-- = applySubst (compose s1 s2) . applySubst s3
-- = applySubst (compose (compose s1 s2) s3)
-- @
mappend subst1 subst2 = Subst (s1 `M.union` s2)
where
Subst s1 = subst1
Subst s2 = applySubst subst1 subst2
mempty = Subst M.empty
-- #############################################################################
-- #############################################################################
-- * Typechecking
-- #############################################################################
-- #############################################################################
-- $ Typechecking does two things:
--
-- 1. If two types are not immediately identical, attempt to 'unify' them
-- to get a type compatible with both of them
-- 2. 'infer' the most general type of a value by comparing the values in its
-- definition with the 'Env'ironment
-- #############################################################################
-- ** Inference context
-- #############################################################################
-- | The inference type holds a supply of unique names, and can fail with a
-- descriptive error if something goes wrong.
--
-- /Invariant:/ the supply must be infinite, or we might run out of names to
-- give to things.
newtype Infer a = Infer (ExceptT InferError (State [Name]) a)
deriving (Functor, Applicative, Monad)
-- | Errors that can happen during the type inference process.
data InferError =
-- | Two types that don't match were attempted to be unified.
--
-- For example, @a -> a@ and @Int@ do not unify.
--
-- >>> putPprLn (CannotUnify (TFun "a" "a") (TConst "Int"))
-- Cannot unify a → a with Int
CannotUnify MType MType
-- | A 'TVar' is bound to an 'MType' that already contains it.
--
-- The canonical example of this is @λx. x x@, where the first @x@
-- in the body has to have type @a -> b@, and the second one @a@. Since
-- they're both the same @x@, this requires unification of @a@ with
-- @a -> b@, which only works if @a = a -> b = (a -> b) -> b = …@, yielding
-- an infinite type.
--
-- >>> putPprLn (OccursCheckFailed "a" (TFun "a" "a"))
-- Occurs check failed: a already appears in a → a
| OccursCheckFailed Name MType
-- | The value of an unknown identifier was read.
--
-- >>> putPprLn (UnknownIdentifier "a")
-- Unknown identifier: a
| UnknownIdentifier Name
deriving Show
-- | >>> putPprLn (CannotUnify (TEither "a" "b") (TTuple "a" "b"))
-- Cannot unify Either a b with (a, b)
instance Pretty InferError where
ppr = \case
CannotUnify t1 t2 ->
"Cannot unify " <> ppr t1 <> " with " <> ppr t2
OccursCheckFailed name ty ->
"Occurs check failed: " <> ppr name <> " already appears in " <> ppr ty
UnknownIdentifier name ->
"Unknown identifier: " <> ppr name
-- | Evaluate a value in an 'Infer'ence context.
--
-- >>> let expr = EAbs "f" (EAbs "g" (EAbs "x" (EApp (EApp "f" "x") (EApp "g" "x"))))
-- >>> putPprLn expr
-- λf g x. f x (g x)
-- >>> let inferred = runInfer (infer (Env []) expr)
-- >>> let demonstrate = \case Right (_, ty) -> T.putStrLn (":: " <> ppr ty)
-- >>> demonstrate inferred
-- :: (c → e → f) → (c → e) → c → f
runInfer :: Infer a -- ^ Inference data
-> Either InferError a
runInfer (Infer inf) =
evalState (runExceptT inf) (map Name (infiniteSupply alphabet))
where
alphabet = map T.singleton ['a'..'z']
-- [a, b, c] ==> [a,b,c, a1,b1,c1, a2,b2,c2, …]
infiniteSupply supply = supply <> addSuffixes supply (1 :: Integer)
where
addSuffixes xs n = map (\x -> addSuffix x n) xs <> addSuffixes xs (n+1)
addSuffix x n = x <> T.pack (show n)
-- | Throw an 'InferError' in an 'Infer'ence context.
--
-- >>> case runInfer (throw (UnknownIdentifier "var")) of Left err -> putPprLn err
-- Unknown identifier: var
throw :: InferError -> Infer a
throw = Infer . throwE
-- #############################################################################
-- ** Unification
-- #############################################################################
-- $ Unification describes the process of making two different types compatible
-- by specializing them where needed. A desirable property to have here is being
-- able to find the most general unifier. Luckily, we'll be able to do that in
-- our type system.
-- | The unification of two 'MType's is the most general substituion that can be
-- applied to both of them in order to yield the same result.
--
-- >>> let m1 = TFun "a" "b"
-- >>> putPprLn m1
-- a → b
-- >>> let m2 = TFun "c" (TEither "d" "e")
-- >>> putPprLn m2
-- c → Either d e
-- >>> let inferSubst = unify (m1, m2)
-- >>> case runInfer inferSubst of Right subst -> putPprLn subst
-- { a > c
-- , b > Either d e }
unify :: (MType, MType) -> Infer Subst
unify = \case
(TFun a b, TFun x y) -> unifyBinary (a,b) (x,y)
(TVar v, x) -> v `bindVariableTo` x
(x, TVar v) -> v `bindVariableTo` x
(TConst a, TConst b) | a == b -> pure mempty
(TList a, TList b) -> unify (a,b)
(TEither a b, TEither x y) -> unifyBinary (a,b) (x,y)
(TTuple a b, TTuple x y) -> unifyBinary (a,b) (x,y)
(a, b) -> throw (CannotUnify a b)
where
-- Unification of binary type constructors, such as functions and Either.
-- Unification is first done for the first operand, and assuming the
-- required substitution, for the second one.
unifyBinary :: (MType, MType) -> (MType, MType) -> Infer Subst
unifyBinary (a,b) (x,y) = do
s1 <- unify (a, x)
s2 <- unify (applySubst s1 (b, y))
pure (s1 <> s2)
-- | Build a 'Subst'itution that binds a 'Name' of a 'TVar' to an 'MType'. The
-- resulting substitution should be idempotent, i.e. applying it more than once
-- to something should not be any different from applying it only once.
--
-- - In the simplest case, this just means building a substitution that just
-- does that.
-- - Substituting a 'Name' with a 'TVar' with the same name unifies a type
-- variable with itself, and the resulting substitution does nothing new.
-- - If the 'Name' we're trying to bind to an 'MType' already occurs in that
-- 'MType', the resulting substitution would not be idempotent: the 'MType'
-- would be replaced again, yielding a different result. This is known as the
-- Occurs Check.
bindVariableTo :: Name -> MType -> Infer Subst
bindVariableTo name (TVar v) | boundToSelf = pure mempty
where
boundToSelf = name == v
bindVariableTo name mType | name `occursIn` mType = throw (OccursCheckFailed name mType)
where
n `occursIn` ty = n `S.member` freeMType ty
bindVariableTo name mType = pure (Subst (M.singleton name mType))
-- #############################################################################
-- ** Type inference
-- #############################################################################
-- $ Type inference is the act of finding out a value's type by looking at the
-- environment it is in, in order to make it compatible with it.
--
-- In literature, the Hindley-Damas-Milner inference algorithm ("Algorithm W")
-- is often presented in the style of logical formulas, and below you'll find
-- that version along with code that actually does what they say.
--
-- These formulas look a bit like fractions, where the "numerator" is a
-- collection of premises, and the denominator is the consequence if all of them
-- hold.
--
-- __Example:__
--
-- @
-- Γ ⊢ even : Int → Bool Γ ⊢ 1 : Int
--
-- Γ ⊢ even 1 : Bool
-- @
--
-- means that if we have a value of type @Int -> Bool@ called "even" and a value
-- of type @Int@ called @1@, then we also have a value of type @Bool@ via
-- @even 1@ available to us.
--
-- The actual inference rules are polymorphic versions of this example, and
-- the code comments will explain each step in detail.
-- -----------------------------------------------------------------------------
-- *** The language: typed lambda calculus
-- -----------------------------------------------------------------------------
-- | The syntax tree of the language we'd like to typecheck. You can view it as
-- a close relative to simply typed lambda calculus, having only the most
-- necessary syntax elements.
--
-- Since 'ELet' is non-recursive, the usual fixed-point function
-- @fix : (a → a) → a@ can be introduced to allow recursive definitions.
data Exp = ELit Lit -- ^ True, 1
| EVar Name -- ^ @x@
| EApp Exp Exp -- ^ @f x@
| EAbs Name Exp -- ^ @λx. e@
| ELet Name Exp Exp -- ^ @let x = e in e'@ (non-recursive)
deriving Show
-- | Literals we'd like to support. Since we can't define new data types in our
-- simple type system, we'll have to hard-code the possible ones here.
data Lit = LBool Bool
| LInteger Integer
deriving Show
-- | >>> putPprLn (EAbs "f" (EAbs "g" (EAbs "x" (EApp (EApp "f" "x") (EApp "g" "x")))))
-- λf g x. f x (g x)
instance Pretty Exp where
ppr (ELit lit) = ppr lit
ppr (EVar name) = ppr name
ppr (EApp f x) = pprApp1 f <> " " <> pprApp2 x
where
pprApp1 = \case
eLet@ELet{} -> "(" <> ppr eLet <> ")"
eLet@EAbs{} -> "(" <> ppr eLet <> ")"
e -> ppr e
pprApp2 = \case
eApp@EApp{} -> "(" <> ppr eApp <> ")"
e -> pprApp1 e
ppr x@EAbs{} = pprAbs True x
where
pprAbs True (EAbs name expr) = "λ" <> ppr name <> pprAbs False expr
pprAbs False (EAbs name expr) = " " <> ppr name <> pprAbs False expr
pprAbs _ expr = ". " <> ppr expr
ppr (ELet name value body) =
"let " <> ppr name <> " = " <> ppr value <> " in " <> ppr body
-- | >>> putPprLn (LBool True)
-- True
--
-- >>> putPprLn (LInteger 127)
-- 127
instance Pretty Lit where
ppr = \case
LBool b -> showT b
LInteger i -> showT i
where
showT :: Show a => a -> Text
showT = T.pack . show
-- | >>> "var" :: Exp
-- EVar (Name "var")
instance IsString Exp where
fromString = EVar . fromString
-- -----------------------------------------------------------------------------
-- *** Some useful definitions
-- -----------------------------------------------------------------------------
-- | Generate a fresh 'Name' in a type 'Infer'ence context. An example use case
-- of this is η expansion, which transforms @f@ into @λx. f x@, where "x" is a
-- new name, i.e. unbound in the current context.
fresh :: Infer MType
fresh = drawFromSupply >>= \case
Right name -> pure (TVar name)
Left err -> throw err
where
drawFromSupply :: Infer (Either InferError Name)
drawFromSupply = Infer (do
s:upply <- lift get
lift (put upply)
pure (Right s) )
-- | Add a new binding to the environment.
--
-- The Haskell equivalent would be defining a new value, for example in module
-- scope or in a @let@ block. This corresponds to the "comma" operation used in
-- formal notation,
--
-- @
-- Γ, x:σ ≡ extendEnv Γ (x,σ)
-- @
extendEnv :: Env -> (Name, PType) -> Env
extendEnv (Env env) (name, pType) = Env (M.insert name pType env)
-- -----------------------------------------------------------------------------
-- *** Inferring the types of all language constructs
-- -----------------------------------------------------------------------------
-- | Infer the type of an 'Exp'ression in an 'Env'ironment, resulting in the
-- 'Exp's 'MType' along with a substitution that has to be done in order to reach
-- this goal.
--
-- This is widely known as /Algorithm W/.
infer :: Env -> Exp -> Infer (Subst, MType)
infer env = \case
ELit lit -> inferLit lit
EVar name -> inferVar env name
EApp f x -> inferApp env f x
EAbs x e -> inferAbs env x e
ELet x e e' -> inferLet env x e e'
-- | Literals such as 'True' and '1' have their types hard-coded.
inferLit :: Lit -> Infer (Subst, MType)
inferLit lit = pure (mempty, TConst litTy)
where
litTy = case lit of
LBool {} -> "Bool"
LInteger {} -> "Integer"
-- | Inferring the type of a variable is done via
--
-- @
-- x:σ ∈ Γ τ = instantiate(σ)
-- [Var]
-- Γ ⊢ x:τ
-- @
--
-- This means that if @Γ@ /literally contains/ (@∈@) a value, then it also
-- /entails it/ (@⊢@) in all its instantiations.
inferVar :: Env -> Name -> Infer (Subst, MType)
inferVar env name = do
sigma <- lookupEnv env name -- x:σ ∈ Γ
tau <- instantiate sigma -- τ = instantiate(σ)
-- ------------------
pure (mempty, tau) -- Γ ⊢ x:τ
-- | Look up the 'PType' of a 'Name' in the 'Env'ironment.
--
-- This checks whether @x:σ@ is /literally contained/ in @Γ@. For more details
-- about this, see the documentation of 'Env'.
--
-- To give a Haskell analogon, looking up @id@ when @Prelude@ is loaded, the
-- resulting 'PType' would be @id@'s type, namely @forall a. a -> a@.
lookupEnv :: Env -> Name -> Infer PType
lookupEnv (Env env) name = case M.lookup name env of
Just x -> pure x
Nothing -> throw (UnknownIdentifier name)
-- | Bind all quantified variables of a 'PType' to 'fresh' type variables.
--
-- __Example:__ instantiating @forall a. a -> b -> a@ results in the 'MType'
-- @c -> b -> c@, where @c@ is a fresh name (to avoid shadowing issues).
--
-- You can picture the 'PType' to be the prototype converted to an instantiated
-- 'MType', which can now be used in the unification process.
--
-- Another way of looking at it is by simply forgetting which variables were
-- quantified, carefully avoiding name clashes when doing so.
--
-- 'instantiate' can also be seen as the opposite of 'generalize', which we'll
-- need later to convert an 'MType' to a 'PType'.
instantiate :: PType -> Infer MType
instantiate (Forall qs t) = do
subst <- substituteAllWithFresh qs
pure (applySubst subst t)
where
-- For each given name, add a substitution from that name to a fresh type
-- variable to the result.
substituteAllWithFresh :: Set Name -> Infer Subst
substituteAllWithFresh xs = do
let freshSubstActions = M.fromSet (const fresh) xs
freshSubsts <- sequenceA freshSubstActions
pure (Subst freshSubsts)
-- | Function application captures the fact that if we have a function and an
-- argument we can give to that function, we also have the result value of the
-- result type available to us.
--
-- @
-- Γ ⊢ f : fτ Γ ⊢ x : xτ fxτ = fresh unify(fτ, xτ → fxτ)
-- [App]
-- Γ ⊢ f x : fxτ
-- @
--
-- This rule says that given a function and a value with a type, the function
-- type has to unify with a function type that allows the value type to be its
-- argument.
inferApp
:: Env
-> Exp -- ^ __f__ x
-> Exp -- ^ f __x__
-> Infer (Subst, MType)
inferApp env f x = do
(s1, fTau) <- infer env f -- f : fτ
(s2, xTau) <- infer (applySubst s1 env) x -- x : xτ
fxTau <- fresh -- fxτ = fresh
s3 <- unify (applySubst s2 fTau, TFun xTau fxTau) -- unify (fτ, xτ → fxτ)
let s = s3 <> s2 <> s1 -- --------------------
pure (s, applySubst s3 fxTau) -- f x : fxτ
-- | Lambda abstraction is based on the fact that when we introduce a new
-- variable, the resulting lambda maps from that variable's type to the type of
-- the body.
--
-- @
-- τ = fresh σ = ∀∅. τ Γ, x:σ ⊢ e:τ'
-- [Abs]
-- Γ ⊢ λx.e : τ→τ'
-- @
--
-- Here, @Γ, x:τ@ is @Γ@ extended by one additional mapping, namely @x:τ@.
--
-- Abstraction is typed by extending the environment by a new 'MType', and if
-- under this assumption we can construct a function mapping to a value of that
-- type, we can say that the lambda takes a value and maps to it.
inferAbs
:: Env
-> Name -- ^ λ__x__. e
-> Exp -- ^ λx. __e__
-> Infer (Subst, MType)
inferAbs env x e = do
tau <- fresh -- τ = fresh
let sigma = Forall [] tau -- σ = ∀∅. τ
env' = extendEnv env (x, sigma) -- Γ, x:σ
(s, tau') <- infer env' e -- … ⊢ e:τ'
-- ---------------
pure (s, TFun (applySubst s tau) tau') -- λx.e : τ→τ'
-- | A let binding allows extending the environment with new bindings in a
-- principled manner. To do this, we first have to typecheck the expression to
-- be introduced. The result of this is then generalized to a 'PType', since let
-- bindings introduce new polymorphic values, which are then added to the
-- environment. Now we can finally typecheck the body of the "in" part of the
-- let binding.
--
-- Note that in our simple language, let is non-recursive, but recursion can be
-- introduced as usual by adding a primitive @fix : (a → a) → a@ if desired.
--
-- @
-- Γ ⊢ e:τ σ = gen(Γ,τ) Γ, x:σ ⊢ e':τ'
-- [Let]
-- Γ ⊢ let x = e in e' : τ'
-- @
inferLet
:: Env
-> Name -- ^ let __x__ = e in e'
-> Exp -- ^ let x = __e__ in e'
-> Exp -- ^ let x = e in __e'__
-> Infer (Subst, MType)
inferLet env x e e' = do
(s1, tau) <- infer env e -- Γ ⊢ e:τ
let env' = applySubst s1 env
let sigma = generalize env' tau -- σ = gen(Γ,τ)
let env'' = extendEnv env' (x, sigma) -- Γ, x:σ
(s2, tau') <- infer env'' e' -- Γ ⊢ …
-- --------------------------
pure (s2 <> s1, tau') -- … let x = e in e' : τ'
-- | Generalize an 'MType' to a 'PType' by universally quantifying over all the
-- type variables contained in it, except those already free in the environment.
--
-- >>> let tau = TFun "a" (TFun "b" "a")
-- >>> putPprLn tau
-- a → b → a
-- >>> putPprLn (generalize (Env [("x", Forall [] "b")]) tau)
-- ∀a. a → b → a
--
-- In more formal notation,
--
-- @
-- gen(Γ,τ) = ∀{α}. τ
-- where {α} = free(τ) free(Γ)
-- @
--
-- 'generalize' can also be seen as the opposite of 'instantiate', which
-- converts a 'PType' to an 'MType'.
generalize :: Env -> MType -> PType
generalize env mType = Forall qs mType
where
qs = freeMType mType `S.difference` freeEnv env