schala/HindleyMilner.hs

{-# LANGUAGE GeneralizedNewtypeDeriving #-}
{-# LANGUAGE LambdaCase                 #-}
{-# LANGUAGE OverloadedLists            #-}
{-# LANGUAGE OverloadedStrings          #-}


-- | This module is an extensively documented walkthrough for typechecking a
-- basic functional language using the Hindley-Damas-Milner algorithm.
--
-- In the end, we'll be able to infer the type of expressions like
--
-- @
-- find (λx. (>) x 0)
--   :: [Integer] -> Either () Integer
-- @
--
-- It can be used in multiple different forms:
--
--  * The source is written in literate programming style, so you can almost
--    read it from top to bottom, minus some few references to later topics.
--  * /Loads/ of doctests (runnable and verified code examples) are included
--  * The code is runnable in GHCi, all definitions are exposed.
--  * A small main module that gives many examples of what you might try out in
--    GHCi is also included.
--  * The Haddock output yields a nice overview over the definitions given, with
--    a nice rendering of a truckload of Haddock comments.

module HindleyMilner where

import           Control.Monad.Trans
import           Control.Monad.Trans.Except
import           Control.Monad.Trans.State
import           Data.Map                   (Map)
import qualified Data.Map                   as M
import           Data.Monoid
import           Data.Set                   (Set)
import qualified Data.Set                   as S
import           Data.String
import           Data.Text                  (Text)
import qualified Data.Text                  as T


-- $setup
--
-- For running doctests:
--
-- >>> :set -XOverloadedStrings
-- >>> :set -XOverloadedLists
-- >>> :set -XLambdaCase
-- >>> import qualified Data.Text.IO as T
-- >>> let putPprLn = T.putStrLn . ppr


-- #############################################################################
-- #############################################################################
-- * Preliminaries
-- #############################################################################
-- #############################################################################


-- #############################################################################
-- ** Prettyprinting
-- #############################################################################


-- | A prettyprinter class. Similar to 'Show', but with a focus on having
-- human-readable output as opposed to being valid Haskell.
class Pretty a where
    ppr :: a -> Text


-- #############################################################################
-- ** Names
-- #############################################################################


-- | A 'name' is an identifier in the language we're going to typecheck.
-- Variables on both the term and type level have 'Name's, for example.
newtype Name = Name Text
    deriving (Eq, Ord, Show)

-- | >>> "lorem" :: Name
-- Name "lorem"
instance IsString Name where
    fromString = Name . T.pack

-- | >>> putPprLn (Name "var")
-- var
instance Pretty Name where
    ppr (Name n) = n


-- #############################################################################
-- ** Monotypes
-- #############################################################################


-- | A monotype is an unquantified/unparametric type, in other words it contains
-- no @forall@s. Monotypes are the inner building blocks of all types. Examples
-- of monotypes are @Int@, @a@, @a -> b@.
--
-- In formal notation, 'MType's are often called τ (tau) types.
data MType = TVar Name           -- ^ @a@
           | TFun MType MType    -- ^ @a -> b@
           | TConst Name         -- ^ @Int@, @()@, …

           -- Since we can't declare our own types in our simple type system
           -- here, we'll hard-code certain basic ones so we can typecheck some
           -- familar functions that use them later.
           | TList MType         -- ^ @[a]@
           | TEither MType MType -- ^ @Either a b@
           | TTuple MType MType  -- ^ @(a,b)@
           deriving Show

-- | >>> putPprLn (TFun (TEither (TVar "a") (TVar "b")) (TFun (TVar "c") (TVar "d")))
-- Either a b → c → d
--
-- Using the 'IsString' instance:
--
-- >>> putPprLn (TFun (TEither "a" "b") (TFun "c" "d"))
-- Either a b → c → d
instance Pretty MType where
    ppr = go False
      where
        go _ (TVar name)   = ppr name
        go _ (TList a)     = "[" <> ppr a <> "]"
        go _ (TEither l r) = "Either " <> ppr l <> " " <> ppr r
        go _ (TTuple a b)  = "(" <> ppr a <> ", " <> ppr b <> ")"
        go _ (TConst name) = ppr name
        go parenthesize (TFun a b)
            | parenthesize = "(" <> lhs <> " → " <> rhs <> ")"
            | otherwise    =        lhs <> " → " <> rhs
            where lhs = go True a
                  rhs = go False b

-- | >>> "var" :: MType
-- TVar (Name "var")
instance IsString MType where
    fromString = TVar . fromString


-- | The free variables of an 'MType'. This is simply the collection of all the
-- individual type variables occurring inside of it.
--
-- __Example:__ The free variables of @a -> b@ are @a@ and @b@.
freeMType :: MType -> Set Name
freeMType = \case
    TVar a      -> [a]
    TFun a b    -> freeMType a <> freeMType b
    TList a     -> freeMType a
    TEither l r -> freeMType l <> freeMType r
    TTuple a b  -> freeMType a <> freeMType b
    TConst _    -> []


-- | Substitute all the contained type variables mentioned in the substitution,
-- and leave everything else alone.
instance Substitutable MType where
    applySubst s = \case
        TVar a -> let Subst s' = s
                  in M.findWithDefault (TVar a) a s'
        TFun f x    -> TFun (applySubst s f) (applySubst s x)
        TList a     -> TList (applySubst s a)
        TEither l r -> TEither (applySubst s l) (applySubst s r)
        TTuple a b  -> TTuple (applySubst s a) (applySubst s b)
        c@TConst {} -> c


-- #############################################################################
-- ** Polytypes
-- #############################################################################

-- | A polytype is a monotype universally quantified over a number of type
-- variables. In Haskell, all definitions have polytypes, but since the @forall@
-- is implicit they look a bit like monotypes, maybe confusingly so. For
-- example, the type of @1 :: Int@ is actually @forall <nothing>. Int@, and the
-- type of @id@ is @forall a. a -> a@, although GHC displays it as @a -> a@.
--
-- A polytype claims to work "for all imaginable type parameters", very similar
-- to how a lambda claims to work "for all imaginable value parameters". We can
-- insert a value into a lambda's parameter to evaluate it to a new value, and
-- similarly we'll later insert types into a polytype's quantified variables to
-- gain new types.
--
-- __Example:__ in a definition @id :: forall a. a -> a@, the @a@ after the
-- ∀ ("forall") is the collection of type variables, and @a -> a@ is the 'MType'
-- quantified over. When we have such an @id@, we also have its specialized
-- version @Int -> Int@ available. This process will be the topic of the type
-- inference/unification algorithms.
--
-- In formal notation, 'PType's are often called σ (sigma) types.
--
-- The purpose of having monotypes and polytypes is that we'd like to only have
-- universal quantification at the top level, restricting our language to rank-1
-- polymorphism, where type inferece is total (all types can be inferred) and
-- simple (only a handful of typing rules). Weakening this constraint would be
-- easy: if we allowed universal quantification within function types we would
-- get rank-N polymorphism. Taking it even further to allow it anywhere,
-- effectively replacing all occurrences of 'MType' with 'PType', yields
-- impredicative types. Both these extensions make the type system
-- *significantly* more complex though.
data PType = Forall (Set Name) MType -- ^ ∀{α}. τ

-- | >>> putPprLn (Forall ["a"] (TFun "a" "a"))
-- ∀a. a → a
instance Pretty PType where
    ppr (Forall qs mType) = "∀" <> pprUniversals <> ". " <> ppr mType
      where
        pprUniversals
          | S.null qs = "∅"
          | otherwise = (T.intercalate " " . map ppr . S.toList) qs


-- | The free variables of a 'PType' are the free variables of the contained
-- 'MType', except those universally quantified.
--
-- >>> let sigma = Forall ["a"] (TFun "a" (TFun (TTuple "b" "a") "c"))
-- >>> putPprLn sigma
-- ∀a. a → (b, a) → c
-- >>> let display = T.putStrLn . T.intercalate ", " . foldMap (\x -> [ppr x])
-- >>> display (freePType sigma)
-- b, c
freePType :: PType -> Set Name
freePType (Forall qs mType) = freeMType mType `S.difference` qs


-- | Substitute all the free type variables.
instance Substitutable PType where
    applySubst (Subst subst) (Forall qs mType) =
        let qs' = M.fromSet (const ()) qs
            subst' = Subst (subst `M.difference` qs')
        in Forall qs (applySubst subst' mType)


-- #############################################################################
-- ** The environment
-- #############################################################################


-- | The environment consists of all the values available in scope, and their
-- associated polytypes. Other common names for it include "(typing) context",
-- and because of the commonly used symbol for it sometimes directly
-- \"Gamma"/@"Γ"@.
--
-- There are two kinds of membership in an environment,
--
--   - @∈@: an environment @Γ@ can be viewed as a set of @(value, type)@ pairs,
--     and we can test whether something is /literally contained/ by it via
--     x:σ ∈ Γ
--   - @⊢@, pronounced /entails/, describes all the things that are well-typed,
--     given an environment @Γ@. @Γ ⊢ x:τ@ can thus be seen as a judgement that
--     @x:τ@ is /figuratively contained/ in @Γ@.
--
-- For example, the environment @{x:Int}@ literally contains @x@, but given
-- this, it also entails @λy. x@, @λy z. x@, @let id = λy. y in id x@ and so on.
--
-- In Haskell terms, the environment consists of all the things you currently
-- have available, or that can be built by comining them. If you import the
-- Prelude, your environment entails
--
-- @
-- id            →  ∀a. a→a
-- map           →  ∀a b. (a→b) → [a] → [b]
-- putStrLn      →  ∀∅. String → IO ()
-- …
-- id map        →  ∀a b. (a→b) → [a] → [b]
-- map putStrLn  →  ∀∅. [String] -> [IO ()]
-- …
-- @
newtype Env = Env (Map Name PType)

-- | >>> :{
--    putPprLn (Env
--        [ ("id", Forall ["a"] (TFun "a" "a"))
--        , ("const", Forall ["a", "b"] (TFun "a" (TFun "b" "a"))) ])
-- :}
-- Γ = { const : ∀a b. a → b → a
--     , id : ∀a. a → a }
instance Pretty Env where
    ppr (Env env) = "Γ = { " <> T.intercalate "\n    , " pprBindings <> " }"
      where
        bindings = M.assocs env
        pprBinding (name, pType) = ppr name <> " : " <> ppr pType
        pprBindings = map pprBinding bindings


-- | The free variables of an 'Env'ironment are all the free variables of the
-- 'PType's it contains.
freeEnv :: Env -> Set Name
freeEnv (Env env) = let allPTypes = M.elems env
                    in S.unions (map freePType allPTypes)


-- | Performing a 'Subst'itution in an 'Env'ironment means performing that
-- substituion on all the contained 'PType's.
instance Substitutable Env where
    applySubst s (Env env) = Env (M.map (applySubst s) env)


-- #############################################################################
-- ** Substitutions
-- #############################################################################


-- | A substitution is a mapping from type variables to 'MType's. Applying a
-- substitution means applying those replacements. For example, the substitution
-- @a -> Int@ applied to @a -> a@ yields the result @Int -> Int@.
--
-- A key concept behind Hindley-Milner is that once we dive deeper into an
-- expression, we learn more about our type variables. We might learn that  @a@
-- has to be specialized to @b -> b@, and then later on that @b@ is actually
-- @Int@. Substitutions are an organized way of carrying this information along.
newtype Subst = Subst (Map Name MType)


-- | We're going to apply substitutions to a variety of other values that
-- somehow contain type variables, so we overload this application operation in
-- a class here.
--
-- Laws:
--
-- @
-- 'applySubst' 'mempty' ≡ 'id'
-- 'applySubst' (s1 '<>' s2) ≡ 'applySubst' s1 . 'applySubst' s2
-- @
class Substitutable a where
    applySubst :: Subst -> a -> a

instance (Substitutable a, Substitutable b) => Substitutable (a,b) where
    applySubst s (x,y) = (applySubst s x, applySubst s y)

-- | @'applySubst' s1 s2@ applies one substitution to another, replacing all the
-- bindings in the second argument @s2@ with their values mentioned in the first
-- one (@s1@).
instance Substitutable Subst where
    applySubst s (Subst target) = Subst (fmap (applySubst s) target)

-- | >>> :{
--    putPprLn (Subst
--        [ ("a", TFun "b" "b")
--        , ("b", TEither "c" "d") ])
-- :}
-- { a ––> b → b
-- , b ––> Either c d }
instance Pretty Subst where
    ppr (Subst s) = "{ " <> T.intercalate "\n, " [ ppr k <> " ––> " <> ppr v | (k,v) <- M.toList s ] <> " }"

-- | Combine two substitutions by applying all substitutions mentioned in the
-- first argument to the type variables contained in the second.
instance Monoid Subst where
    -- Considering that all we can really do with a substitution is apply it, we
    -- can use the one of 'Substitutable's laws to show that substitutions
    -- combine associatively,
    --
    -- @
    --   applySubst (compose s1 (compose s2 s3))
    -- = applySubst s1 . applySubst (compose s2 s3)
    -- = applySubst s1 . applySubst s2 . applySubst s3
    -- = applySubst (compose s1 s2) . applySubst s3
    -- = applySubst (compose (compose s1 s2) s3)
    -- @
    mappend subst1 subst2 = Subst (s1 `M.union` s2)
      where
        Subst s1 = subst1
        Subst s2 = applySubst subst1 subst2

    mempty = Subst M.empty


-- #############################################################################
-- #############################################################################
-- * Typechecking
-- #############################################################################
-- #############################################################################

-- $ Typechecking does two things:
--
-- 1. If two types are not immediately identical, attempt to 'unify' them
--    to get a type compatible with both of them
-- 2. 'infer' the most general type of a value by comparing the values in its
--    definition with the 'Env'ironment


-- #############################################################################
-- ** Inference context
-- #############################################################################


-- | The inference type holds a supply of unique names, and can fail with a
-- descriptive error if something goes wrong.
--
-- /Invariant:/ the supply must be infinite, or we might run out of names to
-- give to things.
newtype Infer a = Infer (ExceptT InferError (State [Name]) a)
    deriving (Functor, Applicative, Monad)

-- | Errors that can happen during the type inference process.
data InferError =
    -- | Two types that don't match were attempted to be unified.
    --
    -- For example, @a -> a@ and @Int@ do not unify.
    --
    -- >>> putPprLn (CannotUnify (TFun "a" "a") (TConst "Int"))
    -- Cannot unify a → a with Int
      CannotUnify MType MType

    -- | A 'TVar' is bound to an 'MType' that already contains it.
    --
    -- The canonical example of this is @λx. x x@, where the first @x@
    -- in the body has to have type @a -> b@, and the second one @a@. Since
    -- they're both the same @x@, this requires unification of @a@ with
    -- @a -> b@, which only works if @a = a -> b = (a -> b) -> b = …@, yielding
    -- an infinite type.
    --
    -- >>> putPprLn (OccursCheckFailed "a" (TFun "a" "a"))
    -- Occurs check failed: a already appears in a → a
    | OccursCheckFailed Name MType

    -- | The value of an unknown identifier was read.
    --
    -- >>> putPprLn (UnknownIdentifier "a")
    -- Unknown identifier: a
    | UnknownIdentifier Name
    deriving Show

-- | >>> putPprLn (CannotUnify (TEither "a" "b") (TTuple "a" "b"))
-- Cannot unify Either a b with (a, b)
instance Pretty InferError where
    ppr = \case
        CannotUnify t1 t2 ->
            "Cannot unify " <> ppr t1 <> " with " <> ppr t2
        OccursCheckFailed name ty ->
            "Occurs check failed: " <> ppr name <> " already appears in " <> ppr ty
        UnknownIdentifier name ->
            "Unknown identifier: " <> ppr name


-- | Evaluate a value in an 'Infer'ence context.
--
-- >>> let expr = EAbs "f" (EAbs "g" (EAbs "x" (EApp (EApp "f" "x") (EApp "g" "x"))))
-- >>> putPprLn expr
-- λf g x. f x (g x)
-- >>> let inferred = runInfer (infer (Env []) expr)
-- >>> let demonstrate = \case Right (_, ty) -> T.putStrLn (":: " <> ppr ty)
-- >>> demonstrate inferred
-- :: (c → e → f) → (c → e) → c → f
runInfer :: Infer a -- ^ Inference data
         -> Either InferError a
runInfer (Infer inf) =
    evalState (runExceptT inf) (map Name (infiniteSupply alphabet))
  where

    alphabet = map T.singleton ['a'..'z']

    -- [a, b, c] ==> [a,b,c, a1,b1,c1, a2,b2,c2, …]
    infiniteSupply supply = supply <> addSuffixes supply (1 :: Integer)
      where
        addSuffixes xs n = map (\x -> addSuffix x n) xs <> addSuffixes xs (n+1)
        addSuffix x n = x <> T.pack (show n)


-- | Throw an 'InferError' in an 'Infer'ence context.
--
-- >>> case runInfer (throw (UnknownIdentifier "var")) of Left err -> putPprLn err
-- Unknown identifier: var
throw :: InferError -> Infer a
throw = Infer . throwE


-- #############################################################################
-- ** Unification
-- #############################################################################

-- $ Unification describes the process of making two different types compatible
-- by specializing them where needed. A desirable property to have here is being
-- able to find the most general unifier. Luckily, we'll be able to do that in
-- our type system.


-- | The unification of two 'MType's is the most general substituion that can be
-- applied to both of them in order to yield the same result.
--
-- >>> let m1 = TFun "a" "b"
-- >>> putPprLn m1
-- a → b
-- >>> let m2 = TFun "c" (TEither "d" "e")
-- >>> putPprLn m2
-- c → Either d e
-- >>> let inferSubst = unify (m1, m2)
-- >>> case runInfer inferSubst of Right subst -> putPprLn subst
-- { a ––> c
-- , b ––> Either d e }
unify :: (MType, MType) -> Infer Subst
unify = \case
    (TFun a b,    TFun x y)          -> unifyBinary (a,b) (x,y)
    (TVar v,      x)                 -> v `bindVariableTo` x
    (x,           TVar v)            -> v `bindVariableTo` x
    (TConst a,    TConst b) | a == b -> pure mempty
    (TList a,     TList b)           -> unify (a,b)
    (TEither a b, TEither x y)       -> unifyBinary (a,b) (x,y)
    (TTuple a b,  TTuple x y)        -> unifyBinary (a,b) (x,y)
    (a, b)                           -> throw (CannotUnify a b)

  where

    -- Unification of binary type constructors, such as functions and Either.
    -- Unification is first done for the first operand, and assuming the
    -- required substitution, for the second one.
    unifyBinary :: (MType, MType) -> (MType, MType) -> Infer Subst
    unifyBinary (a,b) (x,y) = do
        s1 <- unify (a, x)
        s2 <- unify (applySubst s1 (b, y))
        pure (s1 <> s2)


-- | Build a 'Subst'itution that binds a 'Name' of a 'TVar' to an 'MType'. The
-- resulting substitution should be idempotent, i.e. applying it more than once
-- to something should not be any different from applying it only once.
--
-- - In the simplest case, this just means building a substitution that just
--   does that.
-- - Substituting a 'Name' with a 'TVar' with the same name unifies a type
--   variable with itself, and the resulting substitution does nothing new.
-- - If the 'Name' we're trying to bind to an 'MType' already occurs in that
--   'MType', the resulting substitution would not be idempotent: the 'MType'
--   would be replaced again, yielding a different result. This is known as the
--   Occurs Check.
bindVariableTo :: Name -> MType -> Infer Subst

bindVariableTo name (TVar v) | boundToSelf = pure mempty
  where
    boundToSelf = name == v

bindVariableTo name mType | name `occursIn` mType = throw (OccursCheckFailed name mType)
  where
    n `occursIn` ty = n `S.member` freeMType ty

bindVariableTo name mType = pure (Subst (M.singleton name mType))


-- #############################################################################
-- ** Type inference
-- #############################################################################

-- $ Type inference is the act of finding out a value's type by looking at the
-- environment it is in, in order to make it compatible with it.
--
-- In literature, the Hindley-Damas-Milner inference algorithm ("Algorithm W")
-- is often presented in the style of logical formulas, and below you'll find
-- that version along with code that actually does what they say.
--
-- These formulas look a bit like fractions, where the "numerator" is a
-- collection of premises, and the denominator is the consequence if all of them
-- hold.
--
-- __Example:__
--
-- @
-- Γ ⊢ even : Int → Bool   Γ ⊢ 1 : Int
-- –––––––––––––––––––––––––––––––––––
--          Γ ⊢ even 1 : Bool
-- @
--
-- means that if we have a value of type @Int -> Bool@ called "even" and a value
-- of type @Int@ called @1@, then we also have a value of type @Bool@ via
-- @even 1@ available to us.
--
-- The actual inference rules are polymorphic versions of this example, and
-- the code comments will explain each step in detail.


-- -----------------------------------------------------------------------------
-- *** The language: typed lambda calculus
-- -----------------------------------------------------------------------------


-- | The syntax tree of the language we'd like to typecheck. You can view it as
-- a close relative to simply typed lambda calculus, having only the most
-- necessary syntax elements.
--
-- Since 'ELet' is non-recursive, the usual fixed-point function
-- @fix : (a → a) → a@ can be introduced to allow recursive definitions.
data Exp = ELit Lit          -- ^ True, 1
         | EVar Name         -- ^ @x@
         | EApp Exp Exp      -- ^ @f x@
         | EAbs Name Exp     -- ^ @λx. e@
         | ELet Name Exp Exp -- ^ @let x = e in e'@ (non-recursive)
         deriving Show


-- | Literals we'd like to support. Since we can't define new data types in our
-- simple type system, we'll have to hard-code the possible ones here.
data Lit = LBool Bool
         | LInteger Integer
         deriving Show


-- | >>> putPprLn (EAbs "f" (EAbs "g" (EAbs "x" (EApp (EApp "f" "x") (EApp "g" "x")))))
-- λf g x. f x (g x)
instance Pretty Exp where
    ppr (ELit lit) = ppr lit

    ppr (EVar name) = ppr name

    ppr (EApp f x) = pprApp1 f <> " " <> pprApp2 x
      where
        pprApp1 = \case
            eLet@ELet{} -> "(" <> ppr eLet <> ")"
            eLet@EAbs{} -> "(" <> ppr eLet <> ")"
            e -> ppr e
        pprApp2 = \case
            eApp@EApp{} -> "(" <> ppr eApp <> ")"
            e -> pprApp1 e

    ppr x@EAbs{} = pprAbs True x
      where
        pprAbs True  (EAbs name expr) = "λ" <> ppr name <> pprAbs False expr
        pprAbs False (EAbs name expr) = " " <> ppr name <> pprAbs False expr
        pprAbs _ expr                 = ". " <> ppr expr

    ppr (ELet name value body) =
        "let " <> ppr name <> " = " <> ppr value <> " in " <> ppr body

-- | >>> putPprLn (LBool True)
-- True
--
-- >>> putPprLn (LInteger 127)
-- 127
instance Pretty Lit where
    ppr = \case
        LBool    b -> showT b
        LInteger i -> showT i
      where
        showT :: Show a => a -> Text
        showT = T.pack . show

-- | >>> "var" :: Exp
-- EVar (Name "var")
instance IsString Exp where
    fromString = EVar . fromString


-- -----------------------------------------------------------------------------
-- *** Some useful definitions
-- -----------------------------------------------------------------------------


-- | Generate a fresh 'Name' in a type 'Infer'ence context. An example use case
-- of this is η expansion, which transforms @f@ into @λx. f x@, where "x" is a
-- new name, i.e. unbound in the current context.
fresh :: Infer MType
fresh = drawFromSupply >>= \case
    Right name -> pure (TVar name)
    Left err -> throw err

  where

    drawFromSupply :: Infer (Either InferError Name)
    drawFromSupply = Infer (do
        s:upply <- lift get
        lift (put upply)
        pure (Right s) )


-- | Add a new binding to the environment.
--
-- The Haskell equivalent would be defining a new value, for example in module
-- scope or in a @let@ block. This corresponds to the "comma" operation used in
-- formal notation,
--
-- @
-- Γ, x:σ  ≡  extendEnv Γ (x,σ)
-- @
extendEnv :: Env -> (Name, PType) -> Env
extendEnv (Env env) (name, pType) = Env (M.insert name pType env)


-- -----------------------------------------------------------------------------
-- *** Inferring the types of all language constructs
-- -----------------------------------------------------------------------------


-- | Infer the type of an 'Exp'ression in an 'Env'ironment, resulting in the
-- 'Exp's 'MType' along with a substitution that has to be done in order to reach
-- this goal.
--
-- This is widely known as /Algorithm W/.
infer :: Env -> Exp -> Infer (Subst, MType)
infer env = \case
    ELit lit    -> inferLit lit
    EVar name   -> inferVar env name
    EApp f x    -> inferApp env f x
    EAbs x e    -> inferAbs env x e
    ELet x e e' -> inferLet env x e e'


-- | Literals such as 'True' and '1' have their types hard-coded.
inferLit :: Lit -> Infer (Subst, MType)
inferLit lit = pure (mempty, TConst litTy)
  where
    litTy = case lit of
        LBool    {} -> "Bool"
        LInteger {} -> "Integer"


-- | Inferring the type of a variable is done via
--
-- @
-- x:σ ∈ Γ   τ = instantiate(σ)
-- ––––––––––––––––––––––––––––  [Var]
--            Γ ⊢ x:τ
-- @
--
-- This means that if @Γ@ /literally contains/ (@∈@) a value, then it also
-- /entails it/ (@⊢@) in all its instantiations.
inferVar :: Env -> Name -> Infer (Subst, MType)
inferVar env name = do
    sigma <- lookupEnv env name -- x:σ ∈ Γ
    tau <- instantiate sigma    -- τ = instantiate(σ)
                                -- ------------------
    pure (mempty, tau)          -- Γ ⊢ x:τ


-- | Look up the 'PType' of a 'Name' in the 'Env'ironment.
--
-- This checks whether @x:σ@ is /literally contained/ in @Γ@. For more details
-- about this, see the documentation of 'Env'.
--
-- To give a Haskell analogon, looking up @id@ when @Prelude@ is loaded, the
-- resulting 'PType' would be @id@'s type, namely @forall a. a -> a@.
lookupEnv :: Env -> Name -> Infer PType
lookupEnv (Env env) name = case M.lookup name env of
    Just x  -> pure x
    Nothing -> throw (UnknownIdentifier name)


-- | Bind all quantified variables of a 'PType' to 'fresh' type variables.
--
-- __Example:__ instantiating @forall a. a -> b -> a@ results in the 'MType'
-- @c -> b -> c@, where @c@ is a fresh name (to avoid shadowing issues).
--
-- You can picture the 'PType' to be the prototype converted to an instantiated
-- 'MType', which can now be used in the unification process.
--
-- Another way of looking at it is by simply forgetting which variables were
-- quantified, carefully avoiding name clashes when doing so.
--
-- 'instantiate' can also be seen as the opposite of 'generalize', which we'll
-- need later to convert an 'MType' to a 'PType'.
instantiate :: PType -> Infer MType
instantiate (Forall qs t) = do
    subst <- substituteAllWithFresh qs
    pure (applySubst subst t)

  where
    -- For each given name, add a substitution from that name to a fresh type
    -- variable to the result.
    substituteAllWithFresh :: Set Name -> Infer Subst
    substituteAllWithFresh xs = do
        let freshSubstActions = M.fromSet (const fresh) xs
        freshSubsts <- sequenceA freshSubstActions
        pure (Subst freshSubsts)


-- | Function application captures the fact that if we have a function and an
-- argument we can give to that function, we also have the result value of the
-- result type available to us.
--
-- @
-- Γ ⊢ f : fτ   Γ ⊢ x : xτ   fxτ = fresh   unify(fτ, xτ → fxτ)
-- –––––––––––––––––––––––––––––––––––––––––––––––––––––––––––  [App]
--                       Γ ⊢ f x : fxτ
-- @
--
-- This rule says that given a function and a value with a type, the function
-- type has to unify with a function type that allows the value type to be its
-- argument.
inferApp
    :: Env
    -> Exp -- ^ __f__ x
    -> Exp -- ^ f __x__
    -> Infer (Subst, MType)
inferApp env f x = do
    (s1, fTau) <- infer env f                         -- f : fτ
    (s2, xTau) <- infer (applySubst s1 env) x         -- x : xτ
    fxTau <- fresh                                    -- fxτ = fresh
    s3 <- unify (applySubst s2 fTau, TFun xTau fxTau) -- unify (fτ, xτ → fxτ)
    let s = s3 <> s2 <> s1                            -- --------------------
    pure (s, applySubst s3 fxTau)                     -- f x : fxτ


-- | Lambda abstraction is based on the fact that when we introduce a new
-- variable, the resulting lambda maps from that variable's type to the type of
-- the body.
--
-- @
-- τ = fresh   σ = ∀∅. τ   Γ, x:σ ⊢ e:τ'
-- –––––––––––––––––––––––––––––––––––––  [Abs]
--             Γ ⊢ λx.e : τ→τ'
-- @
--
-- Here, @Γ, x:τ@ is @Γ@ extended by one additional mapping, namely @x:τ@.
--
-- Abstraction is typed by extending the environment by a new 'MType', and if
-- under this assumption we can construct a function mapping to a value of that
-- type, we can say that the lambda takes a value and maps to it.
inferAbs
    :: Env
    -> Name -- ^ λ__x__. e
    -> Exp  -- ^ λx. __e__
    -> Infer (Subst, MType)
inferAbs env x e = do
    tau <- fresh                           -- τ = fresh
    let sigma = Forall [] tau              -- σ = ∀∅. τ
        env' = extendEnv env (x, sigma)    -- Γ, x:σ …
    (s, tau') <- infer env' e              --        … ⊢ e:τ'
                                           -- ---------------
    pure (s, TFun (applySubst s tau) tau') -- λx.e : τ→τ'


-- | A let binding allows extending the environment with new bindings in a
-- principled manner. To do this, we first have to typecheck the expression to
-- be introduced. The result of this is then generalized to a 'PType', since let
-- bindings introduce new polymorphic values, which are then added to the
-- environment. Now we can finally typecheck the body of the "in" part of the
-- let binding.
--
-- Note that in our simple language, let is non-recursive, but recursion can be
-- introduced as usual by adding a primitive @fix : (a → a) → a@ if desired.
--
-- @
-- Γ ⊢ e:τ   σ = gen(Γ,τ)   Γ, x:σ ⊢ e':τ'
-- –––––––––––––––––––––––––––––––––––––––  [Let]
--         Γ ⊢ let x = e in e' : τ'
-- @
inferLet
    :: Env
    -> Name -- ^ let __x__ = e in e'
    -> Exp -- ^ let x = __e__ in e'
    -> Exp -- ^ let x = e in __e'__
    -> Infer (Subst, MType)
inferLet env x e e' = do
    (s1, tau) <- infer env e              -- Γ ⊢ e:τ
    let env' = applySubst s1 env
    let sigma = generalize env' tau       -- σ = gen(Γ,τ)
    let env'' = extendEnv env' (x, sigma) -- Γ, x:σ
    (s2, tau') <- infer env'' e'          -- Γ ⊢ …
                                          -- --------------------------
    pure (s2 <> s1, tau')                 --     … let x = e in e' : τ'


-- | Generalize an 'MType' to a 'PType' by universally quantifying over all the
-- type variables contained in it, except those already free in the environment.
--
-- >>> let tau = TFun "a" (TFun "b" "a")
-- >>> putPprLn tau
-- a → b → a
-- >>> putPprLn (generalize (Env [("x", Forall [] "b")]) tau)
-- ∀a. a → b → a
--
-- In more formal notation,
--
-- @
-- gen(Γ,τ) = ∀{α}. τ
--     where {α} = free(τ) – free(Γ)
-- @
--
-- 'generalize' can also be seen as the opposite of 'instantiate', which
-- converts a 'PType' to an 'MType'.
generalize :: Env -> MType -> PType
generalize env mType = Forall qs mType
  where
    qs = freeMType mType `S.difference` freeEnv env