{-# LANGUAGE GeneralizedNewtypeDeriving #-} {-# LANGUAGE LambdaCase #-} {-# LANGUAGE OverloadedLists #-} {-# LANGUAGE OverloadedStrings #-} -- | This module is an extensively documented walkthrough for typechecking a -- basic functional language using the Hindley-Damas-Milner algorithm. -- -- In the end, we'll be able to infer the type of expressions like -- -- @ -- find (λx. (>) x 0) -- :: [Integer] -> Either () Integer -- @ -- -- It can be used in multiple different forms: -- -- * The source is written in literate programming style, so you can almost -- read it from top to bottom, minus some few references to later topics. -- * /Loads/ of doctests (runnable and verified code examples) are included -- * The code is runnable in GHCi, all definitions are exposed. -- * A small main module that gives many examples of what you might try out in -- GHCi is also included. -- * The Haddock output yields a nice overview over the definitions given, with -- a nice rendering of a truckload of Haddock comments. module HindleyMilner where import Control.Monad.Trans import Control.Monad.Trans.Except import Control.Monad.Trans.State import Data.Map (Map) import qualified Data.Map as M import Data.Monoid import Data.Set (Set) import qualified Data.Set as S import Data.String import Data.Text (Text) import qualified Data.Text as T -- $setup -- -- For running doctests: -- -- >>> :set -XOverloadedStrings -- >>> :set -XOverloadedLists -- >>> :set -XLambdaCase -- >>> import qualified Data.Text.IO as T -- >>> let putPprLn = T.putStrLn . ppr -- ############################################################################# -- ############################################################################# -- * Preliminaries -- ############################################################################# -- ############################################################################# -- ############################################################################# -- ** Prettyprinting -- ############################################################################# -- | A prettyprinter class. Similar to 'Show', but with a focus on having -- human-readable output as opposed to being valid Haskell. class Pretty a where ppr :: a -> Text -- ############################################################################# -- ** Names -- ############################################################################# -- | A 'name' is an identifier in the language we're going to typecheck. -- Variables on both the term and type level have 'Name's, for example. newtype Name = Name Text deriving (Eq, Ord, Show) -- | >>> "lorem" :: Name -- Name "lorem" instance IsString Name where fromString = Name . T.pack -- | >>> putPprLn (Name "var") -- var instance Pretty Name where ppr (Name n) = n -- ############################################################################# -- ** Monotypes -- ############################################################################# -- | A monotype is an unquantified/unparametric type, in other words it contains -- no @forall@s. Monotypes are the inner building blocks of all types. Examples -- of monotypes are @Int@, @a@, @a -> b@. -- -- In formal notation, 'MType's are often called τ (tau) types. data MType = TVar Name -- ^ @a@ | TFun MType MType -- ^ @a -> b@ | TConst Name -- ^ @Int@, @()@, … -- Since we can't declare our own types in our simple type system -- here, we'll hard-code certain basic ones so we can typecheck some -- familar functions that use them later. | TList MType -- ^ @[a]@ | TEither MType MType -- ^ @Either a b@ | TTuple MType MType -- ^ @(a,b)@ deriving Show -- | >>> putPprLn (TFun (TEither (TVar "a") (TVar "b")) (TFun (TVar "c") (TVar "d"))) -- Either a b → c → d -- -- Using the 'IsString' instance: -- -- >>> putPprLn (TFun (TEither "a" "b") (TFun "c" "d")) -- Either a b → c → d instance Pretty MType where ppr = go False where go _ (TVar name) = ppr name go _ (TList a) = "[" <> ppr a <> "]" go _ (TEither l r) = "Either " <> ppr l <> " " <> ppr r go _ (TTuple a b) = "(" <> ppr a <> ", " <> ppr b <> ")" go _ (TConst name) = ppr name go parenthesize (TFun a b) | parenthesize = "(" <> lhs <> " → " <> rhs <> ")" | otherwise = lhs <> " → " <> rhs where lhs = go True a rhs = go False b -- | >>> "var" :: MType -- TVar (Name "var") instance IsString MType where fromString = TVar . fromString -- | The free variables of an 'MType'. This is simply the collection of all the -- individual type variables occurring inside of it. -- -- __Example:__ The free variables of @a -> b@ are @a@ and @b@. freeMType :: MType -> Set Name freeMType = \case TVar a -> [a] TFun a b -> freeMType a <> freeMType b TList a -> freeMType a TEither l r -> freeMType l <> freeMType r TTuple a b -> freeMType a <> freeMType b TConst _ -> [] -- | Substitute all the contained type variables mentioned in the substitution, -- and leave everything else alone. instance Substitutable MType where applySubst s = \case TVar a -> let Subst s' = s in M.findWithDefault (TVar a) a s' TFun f x -> TFun (applySubst s f) (applySubst s x) TList a -> TList (applySubst s a) TEither l r -> TEither (applySubst s l) (applySubst s r) TTuple a b -> TTuple (applySubst s a) (applySubst s b) c@TConst {} -> c -- ############################################################################# -- ** Polytypes -- ############################################################################# -- | A polytype is a monotype universally quantified over a number of type -- variables. In Haskell, all definitions have polytypes, but since the @forall@ -- is implicit they look a bit like monotypes, maybe confusingly so. For -- example, the type of @1 :: Int@ is actually @forall . Int@, and the -- type of @id@ is @forall a. a -> a@, although GHC displays it as @a -> a@. -- -- A polytype claims to work "for all imaginable type parameters", very similar -- to how a lambda claims to work "for all imaginable value parameters". We can -- insert a value into a lambda's parameter to evaluate it to a new value, and -- similarly we'll later insert types into a polytype's quantified variables to -- gain new types. -- -- __Example:__ in a definition @id :: forall a. a -> a@, the @a@ after the -- ∀ ("forall") is the collection of type variables, and @a -> a@ is the 'MType' -- quantified over. When we have such an @id@, we also have its specialized -- version @Int -> Int@ available. This process will be the topic of the type -- inference/unification algorithms. -- -- In formal notation, 'PType's are often called σ (sigma) types. -- -- The purpose of having monotypes and polytypes is that we'd like to only have -- universal quantification at the top level, restricting our language to rank-1 -- polymorphism, where type inferece is total (all types can be inferred) and -- simple (only a handful of typing rules). Weakening this constraint would be -- easy: if we allowed universal quantification within function types we would -- get rank-N polymorphism. Taking it even further to allow it anywhere, -- effectively replacing all occurrences of 'MType' with 'PType', yields -- impredicative types. Both these extensions make the type system -- *significantly* more complex though. data PType = Forall (Set Name) MType -- ^ ∀{α}. τ -- | >>> putPprLn (Forall ["a"] (TFun "a" "a")) -- ∀a. a → a instance Pretty PType where ppr (Forall qs mType) = "∀" <> pprUniversals <> ". " <> ppr mType where pprUniversals | S.null qs = "∅" | otherwise = (T.intercalate " " . map ppr . S.toList) qs -- | The free variables of a 'PType' are the free variables of the contained -- 'MType', except those universally quantified. -- -- >>> let sigma = Forall ["a"] (TFun "a" (TFun (TTuple "b" "a") "c")) -- >>> putPprLn sigma -- ∀a. a → (b, a) → c -- >>> let display = T.putStrLn . T.intercalate ", " . foldMap (\x -> [ppr x]) -- >>> display (freePType sigma) -- b, c freePType :: PType -> Set Name freePType (Forall qs mType) = freeMType mType `S.difference` qs -- | Substitute all the free type variables. instance Substitutable PType where applySubst (Subst subst) (Forall qs mType) = let qs' = M.fromSet (const ()) qs subst' = Subst (subst `M.difference` qs') in Forall qs (applySubst subst' mType) -- ############################################################################# -- ** The environment -- ############################################################################# -- | The environment consists of all the values available in scope, and their -- associated polytypes. Other common names for it include "(typing) context", -- and because of the commonly used symbol for it sometimes directly -- \"Gamma"/@"Γ"@. -- -- There are two kinds of membership in an environment, -- -- - @∈@: an environment @Γ@ can be viewed as a set of @(value, type)@ pairs, -- and we can test whether something is /literally contained/ by it via -- x:σ ∈ Γ -- - @⊢@, pronounced /entails/, describes all the things that are well-typed, -- given an environment @Γ@. @Γ ⊢ x:τ@ can thus be seen as a judgement that -- @x:τ@ is /figuratively contained/ in @Γ@. -- -- For example, the environment @{x:Int}@ literally contains @x@, but given -- this, it also entails @λy. x@, @λy z. x@, @let id = λy. y in id x@ and so on. -- -- In Haskell terms, the environment consists of all the things you currently -- have available, or that can be built by comining them. If you import the -- Prelude, your environment entails -- -- @ -- id → ∀a. a→a -- map → ∀a b. (a→b) → [a] → [b] -- putStrLn → ∀∅. String → IO () -- … -- id map → ∀a b. (a→b) → [a] → [b] -- map putStrLn → ∀∅. [String] -> [IO ()] -- … -- @ newtype Env = Env (Map Name PType) -- | >>> :{ -- putPprLn (Env -- [ ("id", Forall ["a"] (TFun "a" "a")) -- , ("const", Forall ["a", "b"] (TFun "a" (TFun "b" "a"))) ]) -- :} -- Γ = { const : ∀a b. a → b → a -- , id : ∀a. a → a } instance Pretty Env where ppr (Env env) = "Γ = { " <> T.intercalate "\n , " pprBindings <> " }" where bindings = M.assocs env pprBinding (name, pType) = ppr name <> " : " <> ppr pType pprBindings = map pprBinding bindings -- | The free variables of an 'Env'ironment are all the free variables of the -- 'PType's it contains. freeEnv :: Env -> Set Name freeEnv (Env env) = let allPTypes = M.elems env in S.unions (map freePType allPTypes) -- | Performing a 'Subst'itution in an 'Env'ironment means performing that -- substituion on all the contained 'PType's. instance Substitutable Env where applySubst s (Env env) = Env (M.map (applySubst s) env) -- ############################################################################# -- ** Substitutions -- ############################################################################# -- | A substitution is a mapping from type variables to 'MType's. Applying a -- substitution means applying those replacements. For example, the substitution -- @a -> Int@ applied to @a -> a@ yields the result @Int -> Int@. -- -- A key concept behind Hindley-Milner is that once we dive deeper into an -- expression, we learn more about our type variables. We might learn that @a@ -- has to be specialized to @b -> b@, and then later on that @b@ is actually -- @Int@. Substitutions are an organized way of carrying this information along. newtype Subst = Subst (Map Name MType) -- | We're going to apply substitutions to a variety of other values that -- somehow contain type variables, so we overload this application operation in -- a class here. -- -- Laws: -- -- @ -- 'applySubst' 'mempty' ≡ 'id' -- 'applySubst' (s1 '<>' s2) ≡ 'applySubst' s1 . 'applySubst' s2 -- @ class Substitutable a where applySubst :: Subst -> a -> a instance (Substitutable a, Substitutable b) => Substitutable (a,b) where applySubst s (x,y) = (applySubst s x, applySubst s y) -- | @'applySubst' s1 s2@ applies one substitution to another, replacing all the -- bindings in the second argument @s2@ with their values mentioned in the first -- one (@s1@). instance Substitutable Subst where applySubst s (Subst target) = Subst (fmap (applySubst s) target) -- | >>> :{ -- putPprLn (Subst -- [ ("a", TFun "b" "b") -- , ("b", TEither "c" "d") ]) -- :} -- { a ––> b → b -- , b ––> Either c d } instance Pretty Subst where ppr (Subst s) = "{ " <> T.intercalate "\n, " [ ppr k <> " ––> " <> ppr v | (k,v) <- M.toList s ] <> " }" -- | Combine two substitutions by applying all substitutions mentioned in the -- first argument to the type variables contained in the second. instance Monoid Subst where -- Considering that all we can really do with a substitution is apply it, we -- can use the one of 'Substitutable's laws to show that substitutions -- combine associatively, -- -- @ -- applySubst (compose s1 (compose s2 s3)) -- = applySubst s1 . applySubst (compose s2 s3) -- = applySubst s1 . applySubst s2 . applySubst s3 -- = applySubst (compose s1 s2) . applySubst s3 -- = applySubst (compose (compose s1 s2) s3) -- @ mappend subst1 subst2 = Subst (s1 `M.union` s2) where Subst s1 = subst1 Subst s2 = applySubst subst1 subst2 mempty = Subst M.empty -- ############################################################################# -- ############################################################################# -- * Typechecking -- ############################################################################# -- ############################################################################# -- $ Typechecking does two things: -- -- 1. If two types are not immediately identical, attempt to 'unify' them -- to get a type compatible with both of them -- 2. 'infer' the most general type of a value by comparing the values in its -- definition with the 'Env'ironment -- ############################################################################# -- ** Inference context -- ############################################################################# -- | The inference type holds a supply of unique names, and can fail with a -- descriptive error if something goes wrong. -- -- /Invariant:/ the supply must be infinite, or we might run out of names to -- give to things. newtype Infer a = Infer (ExceptT InferError (State [Name]) a) deriving (Functor, Applicative, Monad) -- | Errors that can happen during the type inference process. data InferError = -- | Two types that don't match were attempted to be unified. -- -- For example, @a -> a@ and @Int@ do not unify. -- -- >>> putPprLn (CannotUnify (TFun "a" "a") (TConst "Int")) -- Cannot unify a → a with Int CannotUnify MType MType -- | A 'TVar' is bound to an 'MType' that already contains it. -- -- The canonical example of this is @λx. x x@, where the first @x@ -- in the body has to have type @a -> b@, and the second one @a@. Since -- they're both the same @x@, this requires unification of @a@ with -- @a -> b@, which only works if @a = a -> b = (a -> b) -> b = …@, yielding -- an infinite type. -- -- >>> putPprLn (OccursCheckFailed "a" (TFun "a" "a")) -- Occurs check failed: a already appears in a → a | OccursCheckFailed Name MType -- | The value of an unknown identifier was read. -- -- >>> putPprLn (UnknownIdentifier "a") -- Unknown identifier: a | UnknownIdentifier Name deriving Show -- | >>> putPprLn (CannotUnify (TEither "a" "b") (TTuple "a" "b")) -- Cannot unify Either a b with (a, b) instance Pretty InferError where ppr = \case CannotUnify t1 t2 -> "Cannot unify " <> ppr t1 <> " with " <> ppr t2 OccursCheckFailed name ty -> "Occurs check failed: " <> ppr name <> " already appears in " <> ppr ty UnknownIdentifier name -> "Unknown identifier: " <> ppr name -- | Evaluate a value in an 'Infer'ence context. -- -- >>> let expr = EAbs "f" (EAbs "g" (EAbs "x" (EApp (EApp "f" "x") (EApp "g" "x")))) -- >>> putPprLn expr -- λf g x. f x (g x) -- >>> let inferred = runInfer (infer (Env []) expr) -- >>> let demonstrate = \case Right (_, ty) -> T.putStrLn (":: " <> ppr ty) -- >>> demonstrate inferred -- :: (c → e → f) → (c → e) → c → f runInfer :: Infer a -- ^ Inference data -> Either InferError a runInfer (Infer inf) = evalState (runExceptT inf) (map Name (infiniteSupply alphabet)) where alphabet = map T.singleton ['a'..'z'] -- [a, b, c] ==> [a,b,c, a1,b1,c1, a2,b2,c2, …] infiniteSupply supply = supply <> addSuffixes supply (1 :: Integer) where addSuffixes xs n = map (\x -> addSuffix x n) xs <> addSuffixes xs (n+1) addSuffix x n = x <> T.pack (show n) -- | Throw an 'InferError' in an 'Infer'ence context. -- -- >>> case runInfer (throw (UnknownIdentifier "var")) of Left err -> putPprLn err -- Unknown identifier: var throw :: InferError -> Infer a throw = Infer . throwE -- ############################################################################# -- ** Unification -- ############################################################################# -- $ Unification describes the process of making two different types compatible -- by specializing them where needed. A desirable property to have here is being -- able to find the most general unifier. Luckily, we'll be able to do that in -- our type system. -- | The unification of two 'MType's is the most general substituion that can be -- applied to both of them in order to yield the same result. -- -- >>> let m1 = TFun "a" "b" -- >>> putPprLn m1 -- a → b -- >>> let m2 = TFun "c" (TEither "d" "e") -- >>> putPprLn m2 -- c → Either d e -- >>> let inferSubst = unify (m1, m2) -- >>> case runInfer inferSubst of Right subst -> putPprLn subst -- { a ––> c -- , b ––> Either d e } unify :: (MType, MType) -> Infer Subst unify = \case (TFun a b, TFun x y) -> unifyBinary (a,b) (x,y) (TVar v, x) -> v `bindVariableTo` x (x, TVar v) -> v `bindVariableTo` x (TConst a, TConst b) | a == b -> pure mempty (TList a, TList b) -> unify (a,b) (TEither a b, TEither x y) -> unifyBinary (a,b) (x,y) (TTuple a b, TTuple x y) -> unifyBinary (a,b) (x,y) (a, b) -> throw (CannotUnify a b) where -- Unification of binary type constructors, such as functions and Either. -- Unification is first done for the first operand, and assuming the -- required substitution, for the second one. unifyBinary :: (MType, MType) -> (MType, MType) -> Infer Subst unifyBinary (a,b) (x,y) = do s1 <- unify (a, x) s2 <- unify (applySubst s1 (b, y)) pure (s1 <> s2) -- | Build a 'Subst'itution that binds a 'Name' of a 'TVar' to an 'MType'. The -- resulting substitution should be idempotent, i.e. applying it more than once -- to something should not be any different from applying it only once. -- -- - In the simplest case, this just means building a substitution that just -- does that. -- - Substituting a 'Name' with a 'TVar' with the same name unifies a type -- variable with itself, and the resulting substitution does nothing new. -- - If the 'Name' we're trying to bind to an 'MType' already occurs in that -- 'MType', the resulting substitution would not be idempotent: the 'MType' -- would be replaced again, yielding a different result. This is known as the -- Occurs Check. bindVariableTo :: Name -> MType -> Infer Subst bindVariableTo name (TVar v) | boundToSelf = pure mempty where boundToSelf = name == v bindVariableTo name mType | name `occursIn` mType = throw (OccursCheckFailed name mType) where n `occursIn` ty = n `S.member` freeMType ty bindVariableTo name mType = pure (Subst (M.singleton name mType)) -- ############################################################################# -- ** Type inference -- ############################################################################# -- $ Type inference is the act of finding out a value's type by looking at the -- environment it is in, in order to make it compatible with it. -- -- In literature, the Hindley-Damas-Milner inference algorithm ("Algorithm W") -- is often presented in the style of logical formulas, and below you'll find -- that version along with code that actually does what they say. -- -- These formulas look a bit like fractions, where the "numerator" is a -- collection of premises, and the denominator is the consequence if all of them -- hold. -- -- __Example:__ -- -- @ -- Γ ⊢ even : Int → Bool Γ ⊢ 1 : Int -- ––––––––––––––––––––––––––––––––––– -- Γ ⊢ even 1 : Bool -- @ -- -- means that if we have a value of type @Int -> Bool@ called "even" and a value -- of type @Int@ called @1@, then we also have a value of type @Bool@ via -- @even 1@ available to us. -- -- The actual inference rules are polymorphic versions of this example, and -- the code comments will explain each step in detail. -- ----------------------------------------------------------------------------- -- *** The language: typed lambda calculus -- ----------------------------------------------------------------------------- -- | The syntax tree of the language we'd like to typecheck. You can view it as -- a close relative to simply typed lambda calculus, having only the most -- necessary syntax elements. -- -- Since 'ELet' is non-recursive, the usual fixed-point function -- @fix : (a → a) → a@ can be introduced to allow recursive definitions. data Exp = ELit Lit -- ^ True, 1 | EVar Name -- ^ @x@ | EApp Exp Exp -- ^ @f x@ | EAbs Name Exp -- ^ @λx. e@ | ELet Name Exp Exp -- ^ @let x = e in e'@ (non-recursive) deriving Show -- | Literals we'd like to support. Since we can't define new data types in our -- simple type system, we'll have to hard-code the possible ones here. data Lit = LBool Bool | LInteger Integer deriving Show -- | >>> putPprLn (EAbs "f" (EAbs "g" (EAbs "x" (EApp (EApp "f" "x") (EApp "g" "x"))))) -- λf g x. f x (g x) instance Pretty Exp where ppr (ELit lit) = ppr lit ppr (EVar name) = ppr name ppr (EApp f x) = pprApp1 f <> " " <> pprApp2 x where pprApp1 = \case eLet@ELet{} -> "(" <> ppr eLet <> ")" eLet@EAbs{} -> "(" <> ppr eLet <> ")" e -> ppr e pprApp2 = \case eApp@EApp{} -> "(" <> ppr eApp <> ")" e -> pprApp1 e ppr x@EAbs{} = pprAbs True x where pprAbs True (EAbs name expr) = "λ" <> ppr name <> pprAbs False expr pprAbs False (EAbs name expr) = " " <> ppr name <> pprAbs False expr pprAbs _ expr = ". " <> ppr expr ppr (ELet name value body) = "let " <> ppr name <> " = " <> ppr value <> " in " <> ppr body -- | >>> putPprLn (LBool True) -- True -- -- >>> putPprLn (LInteger 127) -- 127 instance Pretty Lit where ppr = \case LBool b -> showT b LInteger i -> showT i where showT :: Show a => a -> Text showT = T.pack . show -- | >>> "var" :: Exp -- EVar (Name "var") instance IsString Exp where fromString = EVar . fromString -- ----------------------------------------------------------------------------- -- *** Some useful definitions -- ----------------------------------------------------------------------------- -- | Generate a fresh 'Name' in a type 'Infer'ence context. An example use case -- of this is η expansion, which transforms @f@ into @λx. f x@, where "x" is a -- new name, i.e. unbound in the current context. fresh :: Infer MType fresh = drawFromSupply >>= \case Right name -> pure (TVar name) Left err -> throw err where drawFromSupply :: Infer (Either InferError Name) drawFromSupply = Infer (do s:upply <- lift get lift (put upply) pure (Right s) ) -- | Add a new binding to the environment. -- -- The Haskell equivalent would be defining a new value, for example in module -- scope or in a @let@ block. This corresponds to the "comma" operation used in -- formal notation, -- -- @ -- Γ, x:σ ≡ extendEnv Γ (x,σ) -- @ extendEnv :: Env -> (Name, PType) -> Env extendEnv (Env env) (name, pType) = Env (M.insert name pType env) -- ----------------------------------------------------------------------------- -- *** Inferring the types of all language constructs -- ----------------------------------------------------------------------------- -- | Infer the type of an 'Exp'ression in an 'Env'ironment, resulting in the -- 'Exp's 'MType' along with a substitution that has to be done in order to reach -- this goal. -- -- This is widely known as /Algorithm W/. infer :: Env -> Exp -> Infer (Subst, MType) infer env = \case ELit lit -> inferLit lit EVar name -> inferVar env name EApp f x -> inferApp env f x EAbs x e -> inferAbs env x e ELet x e e' -> inferLet env x e e' -- | Literals such as 'True' and '1' have their types hard-coded. inferLit :: Lit -> Infer (Subst, MType) inferLit lit = pure (mempty, TConst litTy) where litTy = case lit of LBool {} -> "Bool" LInteger {} -> "Integer" -- | Inferring the type of a variable is done via -- -- @ -- x:σ ∈ Γ τ = instantiate(σ) -- –––––––––––––––––––––––––––– [Var] -- Γ ⊢ x:τ -- @ -- -- This means that if @Γ@ /literally contains/ (@∈@) a value, then it also -- /entails it/ (@⊢@) in all its instantiations. inferVar :: Env -> Name -> Infer (Subst, MType) inferVar env name = do sigma <- lookupEnv env name -- x:σ ∈ Γ tau <- instantiate sigma -- τ = instantiate(σ) -- ------------------ pure (mempty, tau) -- Γ ⊢ x:τ -- | Look up the 'PType' of a 'Name' in the 'Env'ironment. -- -- This checks whether @x:σ@ is /literally contained/ in @Γ@. For more details -- about this, see the documentation of 'Env'. -- -- To give a Haskell analogon, looking up @id@ when @Prelude@ is loaded, the -- resulting 'PType' would be @id@'s type, namely @forall a. a -> a@. lookupEnv :: Env -> Name -> Infer PType lookupEnv (Env env) name = case M.lookup name env of Just x -> pure x Nothing -> throw (UnknownIdentifier name) -- | Bind all quantified variables of a 'PType' to 'fresh' type variables. -- -- __Example:__ instantiating @forall a. a -> b -> a@ results in the 'MType' -- @c -> b -> c@, where @c@ is a fresh name (to avoid shadowing issues). -- -- You can picture the 'PType' to be the prototype converted to an instantiated -- 'MType', which can now be used in the unification process. -- -- Another way of looking at it is by simply forgetting which variables were -- quantified, carefully avoiding name clashes when doing so. -- -- 'instantiate' can also be seen as the opposite of 'generalize', which we'll -- need later to convert an 'MType' to a 'PType'. instantiate :: PType -> Infer MType instantiate (Forall qs t) = do subst <- substituteAllWithFresh qs pure (applySubst subst t) where -- For each given name, add a substitution from that name to a fresh type -- variable to the result. substituteAllWithFresh :: Set Name -> Infer Subst substituteAllWithFresh xs = do let freshSubstActions = M.fromSet (const fresh) xs freshSubsts <- sequenceA freshSubstActions pure (Subst freshSubsts) -- | Function application captures the fact that if we have a function and an -- argument we can give to that function, we also have the result value of the -- result type available to us. -- -- @ -- Γ ⊢ f : fτ Γ ⊢ x : xτ fxτ = fresh unify(fτ, xτ → fxτ) -- ––––––––––––––––––––––––––––––––––––––––––––––––––––––––––– [App] -- Γ ⊢ f x : fxτ -- @ -- -- This rule says that given a function and a value with a type, the function -- type has to unify with a function type that allows the value type to be its -- argument. inferApp :: Env -> Exp -- ^ __f__ x -> Exp -- ^ f __x__ -> Infer (Subst, MType) inferApp env f x = do (s1, fTau) <- infer env f -- f : fτ (s2, xTau) <- infer (applySubst s1 env) x -- x : xτ fxTau <- fresh -- fxτ = fresh s3 <- unify (applySubst s2 fTau, TFun xTau fxTau) -- unify (fτ, xτ → fxτ) let s = s3 <> s2 <> s1 -- -------------------- pure (s, applySubst s3 fxTau) -- f x : fxτ -- | Lambda abstraction is based on the fact that when we introduce a new -- variable, the resulting lambda maps from that variable's type to the type of -- the body. -- -- @ -- τ = fresh σ = ∀∅. τ Γ, x:σ ⊢ e:τ' -- ––––––––––––––––––––––––––––––––––––– [Abs] -- Γ ⊢ λx.e : τ→τ' -- @ -- -- Here, @Γ, x:τ@ is @Γ@ extended by one additional mapping, namely @x:τ@. -- -- Abstraction is typed by extending the environment by a new 'MType', and if -- under this assumption we can construct a function mapping to a value of that -- type, we can say that the lambda takes a value and maps to it. inferAbs :: Env -> Name -- ^ λ__x__. e -> Exp -- ^ λx. __e__ -> Infer (Subst, MType) inferAbs env x e = do tau <- fresh -- τ = fresh let sigma = Forall [] tau -- σ = ∀∅. τ env' = extendEnv env (x, sigma) -- Γ, x:σ … (s, tau') <- infer env' e -- … ⊢ e:τ' -- --------------- pure (s, TFun (applySubst s tau) tau') -- λx.e : τ→τ' -- | A let binding allows extending the environment with new bindings in a -- principled manner. To do this, we first have to typecheck the expression to -- be introduced. The result of this is then generalized to a 'PType', since let -- bindings introduce new polymorphic values, which are then added to the -- environment. Now we can finally typecheck the body of the "in" part of the -- let binding. -- -- Note that in our simple language, let is non-recursive, but recursion can be -- introduced as usual by adding a primitive @fix : (a → a) → a@ if desired. -- -- @ -- Γ ⊢ e:τ σ = gen(Γ,τ) Γ, x:σ ⊢ e':τ' -- ––––––––––––––––––––––––––––––––––––––– [Let] -- Γ ⊢ let x = e in e' : τ' -- @ inferLet :: Env -> Name -- ^ let __x__ = e in e' -> Exp -- ^ let x = __e__ in e' -> Exp -- ^ let x = e in __e'__ -> Infer (Subst, MType) inferLet env x e e' = do (s1, tau) <- infer env e -- Γ ⊢ e:τ let env' = applySubst s1 env let sigma = generalize env' tau -- σ = gen(Γ,τ) let env'' = extendEnv env' (x, sigma) -- Γ, x:σ (s2, tau') <- infer env'' e' -- Γ ⊢ … -- -------------------------- pure (s2 <> s1, tau') -- … let x = e in e' : τ' -- | Generalize an 'MType' to a 'PType' by universally quantifying over all the -- type variables contained in it, except those already free in the environment. -- -- >>> let tau = TFun "a" (TFun "b" "a") -- >>> putPprLn tau -- a → b → a -- >>> putPprLn (generalize (Env [("x", Forall [] "b")]) tau) -- ∀a. a → b → a -- -- In more formal notation, -- -- @ -- gen(Γ,τ) = ∀{α}. τ -- where {α} = free(τ) – free(Γ) -- @ -- -- 'generalize' can also be seen as the opposite of 'instantiate', which -- converts a 'PType' to an 'MType'. generalize :: Env -> MType -> PType generalize env mType = Forall qs mType where qs = freeMType mType `S.difference` freeEnv env