diff --git a/HindleyMilner.hs b/HindleyMilner.hs new file mode 100644 index 0000000..53d1b58 --- /dev/null +++ b/HindleyMilner.hs @@ -0,0 +1,920 @@ +{-# LANGUAGE GeneralizedNewtypeDeriving #-} +{-# LANGUAGE LambdaCase #-} +{-# LANGUAGE OverloadedLists #-} +{-# LANGUAGE OverloadedStrings #-} + + + +-- | This module is an extensively documented walkthrough for typechecking a +-- basic functional language using the Hindley-Damas-Milner algorithm. +-- +-- In the end, we'll be able to infer the type of expressions like +-- +-- @ +-- find (λx. (>) x 0) +-- :: [Integer] -> Either () Integer +-- @ +-- +-- It can be used in multiple different forms: +-- +-- * The source is written in literate programming style, so you can almost +-- read it from top to bottom, minus some few references to later topics. +-- * /Loads/ of doctests (runnable and verified code examples) are included +-- * The code is runnable in GHCi, all definitions are exposed. +-- * A small main module that gives many examples of what you might try out in +-- GHCi is also included. +-- * The Haddock output yields a nice overview over the definitions given, with +-- a nice rendering of a truckload of Haddock comments. + +module HindleyMilner where + +import Control.Monad.Trans +import Control.Monad.Trans.Except +import Control.Monad.Trans.State +import Data.Map (Map) +import qualified Data.Map as M +import Data.Monoid +import Data.Set (Set) +import qualified Data.Set as S +import Data.String +import Data.Text (Text) +import qualified Data.Text as T + + + +-- $setup +-- +-- For running doctests: +-- +-- >>> :set -XOverloadedStrings +-- >>> :set -XOverloadedLists +-- >>> :set -XLambdaCase +-- >>> import qualified Data.Text.IO as T +-- >>> let putPprLn = T.putStrLn . ppr + + + +-- ############################################################################# +-- ############################################################################# +-- * Preliminaries +-- ############################################################################# +-- ############################################################################# + + + +-- ############################################################################# +-- ** Prettyprinting +-- ############################################################################# + + + +-- | A prettyprinter class. Similar to 'Show', but with a focus on having +-- human-readable output as opposed to being valid Haskell. +class Pretty a where + ppr :: a -> Text + + + +-- ############################################################################# +-- ** Names +-- ############################################################################# + + + +-- | A 'name' is an identifier in the language we're going to typecheck. +-- Variables on both the term and type level have 'Name's, for example. +newtype Name = Name Text + deriving (Eq, Ord, Show) + +-- | >>> "lorem" :: Name +-- Name "lorem" +instance IsString Name where + fromString = Name . T.pack + +-- | >>> putPprLn (Name "var") +-- var +instance Pretty Name where + ppr (Name n) = n + + + +-- ############################################################################# +-- ** Monotypes +-- ############################################################################# + + + +-- | A monotype is an unquantified/unparametric type, in other words it contains +-- no @forall@s. Monotypes are the inner building blocks of all types. Examples +-- of monotypes are @Int@, @a@, @a -> b@. +-- +-- In formal notation, 'MType's are often called τ (tau) types. +data MType = TVar Name -- ^ @a@ + | TFun MType MType -- ^ @a -> b@ + | TConst Name -- ^ @Int@, @()@, … + + -- Since we can't declare our own types in our simple type system + -- here, we'll hard-code certain basic ones so we can typecheck some + -- familar functions that use them later. + | TList MType -- ^ @[a]@ + | TEither MType MType -- ^ @Either a b@ + | TTuple MType MType -- ^ @(a,b)@ + deriving Show + +-- | >>> putPprLn (TFun (TEither (TVar "a") (TVar "b")) (TFun (TVar "c") (TVar "d"))) +-- Either a b → c → d +-- +-- Using the 'IsString' instance: +-- +-- >>> putPprLn (TFun (TEither "a" "b") (TFun "c" "d")) +-- Either a b → c → d +instance Pretty MType where + ppr = go False + where + go _ (TVar name) = ppr name + go _ (TList a) = "[" <> ppr a <> "]" + go _ (TEither l r) = "Either " <> ppr l <> " " <> ppr r + go _ (TTuple a b) = "(" <> ppr a <> ", " <> ppr b <> ")" + go _ (TConst name) = ppr name + go parenthesize (TFun a b) + | parenthesize = "(" <> lhs <> " → " <> rhs <> ")" + | otherwise = lhs <> " → " <> rhs + where lhs = go True a + rhs = go False b + +-- | >>> "var" :: MType +-- TVar (Name "var") +instance IsString MType where + fromString = TVar . fromString + + + +-- | The free variables of an 'MType'. This is simply the collection of all the +-- individual type variables occurring inside of it. +-- +-- __Example:__ The free variables of @a -> b@ are @a@ and @b@. +freeMType :: MType -> Set Name +freeMType = \case + TVar a -> [a] + TFun a b -> freeMType a <> freeMType b + TList a -> freeMType a + TEither l r -> freeMType l <> freeMType r + TTuple a b -> freeMType a <> freeMType b + TConst _ -> [] + + + +-- | Substitute all the contained type variables mentioned in the substitution, +-- and leave everything else alone. +instance Substitutable MType where + applySubst s = \case + TVar a -> let Subst s' = s + in M.findWithDefault (TVar a) a s' + TFun f x -> TFun (applySubst s f) (applySubst s x) + TList a -> TList (applySubst s a) + TEither l r -> TEither (applySubst s l) (applySubst s r) + TTuple a b -> TTuple (applySubst s a) (applySubst s b) + c@TConst {} -> c + + + +-- ############################################################################# +-- ** Polytypes +-- ############################################################################# + +-- | A polytype is a monotype universally quantified over a number of type +-- variables. In Haskell, all definitions have polytypes, but since the @forall@ +-- is implicit they look a bit like monotypes, maybe confusingly so. For +-- example, the type of @1 :: Int@ is actually @forall . Int@, and the +-- type of @id@ is @forall a. a -> a@, although GHC displays it as @a -> a@. +-- +-- A polytype claims to work "for all imaginable type parameters", very similar +-- to how a lambda claims to work "for all imaginable value parameters". We can +-- insert a value into a lambda's parameter to evaluate it to a new value, and +-- similarly we'll later insert types into a polytype's quantified variables to +-- gain new types. +-- +-- __Example:__ in a definition @id :: forall a. a -> a@, the @a@ after the +-- ∀ ("forall") is the collection of type variables, and @a -> a@ is the 'MType' +-- quantified over. When we have such an @id@, we also have its specialized +-- version @Int -> Int@ available. This process will be the topic of the type +-- inference/unification algorithms. +-- +-- In formal notation, 'PType's are often called σ (sigma) types. +-- +-- The purpose of having monotypes and polytypes is that we'd like to only have +-- universal quantification at the top level, restricting our language to rank-1 +-- polymorphism, where type inferece is total (all types can be inferred) and +-- simple (only a handful of typing rules). Weakening this constraint would be +-- easy: if we allowed universal quantification within function types we would +-- get rank-N polymorphism. Taking it even further to allow it anywhere, +-- effectively replacing all occurrences of 'MType' with 'PType', yields +-- impredicative types. Both these extensions make the type system +-- *significantly* more complex though. +data PType = Forall (Set Name) MType -- ^ ∀{α}. τ + +-- | >>> putPprLn (Forall ["a"] (TFun "a" "a")) +-- ∀a. a → a +instance Pretty PType where + ppr (Forall qs mType) = "∀" <> pprUniversals <> ". " <> ppr mType + where + pprUniversals + | S.null qs = "∅" + | otherwise = (T.intercalate " " . map ppr . S.toList) qs + + + +-- | The free variables of a 'PType' are the free variables of the contained +-- 'MType', except those universally quantified. +-- +-- >>> let sigma = Forall ["a"] (TFun "a" (TFun (TTuple "b" "a") "c")) +-- >>> putPprLn sigma +-- ∀a. a → (b, a) → c +-- >>> let display = T.putStrLn . T.intercalate ", " . foldMap (\x -> [ppr x]) +-- >>> display (freePType sigma) +-- b, c +freePType :: PType -> Set Name +freePType (Forall qs mType) = freeMType mType `S.difference` qs + + + +-- | Substitute all the free type variables. +instance Substitutable PType where + applySubst (Subst subst) (Forall qs mType) = + let qs' = M.fromSet (const ()) qs + subst' = Subst (subst `M.difference` qs') + in Forall qs (applySubst subst' mType) + + + +-- ############################################################################# +-- ** The environment +-- ############################################################################# + + + +-- | The environment consists of all the values available in scope, and their +-- associated polytypes. Other common names for it include "(typing) context", +-- and because of the commonly used symbol for it sometimes directly +-- \"Gamma"/@"Γ"@. +-- +-- There are two kinds of membership in an environment, +-- +-- - @∈@: an environment @Γ@ can be viewed as a set of @(value, type)@ pairs, +-- and we can test whether something is /literally contained/ by it via +-- x:σ ∈ Γ +-- - @⊢@, pronounced /entails/, describes all the things that are well-typed, +-- given an environment @Γ@. @Γ ⊢ x:τ@ can thus be seen as a judgement that +-- @x:τ@ is /figuratively contained/ in @Γ@. +-- +-- For example, the environment @{x:Int}@ literally contains @x@, but given +-- this, it also entails @λy. x@, @λy z. x@, @let id = λy. y in id x@ and so on. +-- +-- In Haskell terms, the environment consists of all the things you currently +-- have available, or that can be built by comining them. If you import the +-- Prelude, your environment entails +-- +-- @ +-- id → ∀a. a→a +-- map → ∀a b. (a→b) → [a] → [b] +-- putStrLn → ∀∅. String → IO () +-- … +-- id map → ∀a b. (a→b) → [a] → [b] +-- map putStrLn → ∀∅. [String] -> [IO ()] +-- … +-- @ +newtype Env = Env (Map Name PType) + +-- | >>> :{ +-- putPprLn (Env +-- [ ("id", Forall ["a"] (TFun "a" "a")) +-- , ("const", Forall ["a", "b"] (TFun "a" (TFun "b" "a"))) ]) +-- :} +-- Γ = { const : ∀a b. a → b → a +-- , id : ∀a. a → a } +instance Pretty Env where + ppr (Env env) = "Γ = { " <> T.intercalate "\n , " pprBindings <> " }" + where + bindings = M.assocs env + pprBinding (name, pType) = ppr name <> " : " <> ppr pType + pprBindings = map pprBinding bindings + + + +-- | The free variables of an 'Env'ironment are all the free variables of the +-- 'PType's it contains. +freeEnv :: Env -> Set Name +freeEnv (Env env) = let allPTypes = M.elems env + in S.unions (map freePType allPTypes) + + + +-- | Performing a 'Subst'itution in an 'Env'ironment means performing that +-- substituion on all the contained 'PType's. +instance Substitutable Env where + applySubst s (Env env) = Env (M.map (applySubst s) env) + + + +-- ############################################################################# +-- ** Substitutions +-- ############################################################################# + + + +-- | A substitution is a mapping from type variables to 'MType's. Applying a +-- substitution means applying those replacements. For example, the substitution +-- @a -> Int@ applied to @a -> a@ yields the result @Int -> Int@. +-- +-- A key concept behind Hindley-Milner is that once we dive deeper into an +-- expression, we learn more about our type variables. We might learn that @a@ +-- has to be specialized to @b -> b@, and then later on that @b@ is actually +-- @Int@. Substitutions are an organized way of carrying this information along. +newtype Subst = Subst (Map Name MType) + + + +-- | We're going to apply substitutions to a variety of other values that +-- somehow contain type variables, so we overload this application operation in +-- a class here. +-- +-- Laws: +-- +-- @ +-- 'applySubst' 'mempty' ≡ 'id' +-- 'applySubst' (s1 '<>' s2) ≡ 'applySubst' s1 . 'applySubst' s2 +-- @ +class Substitutable a where + applySubst :: Subst -> a -> a + +instance (Substitutable a, Substitutable b) => Substitutable (a,b) where + applySubst s (x,y) = (applySubst s x, applySubst s y) + +-- | @'applySubst' s1 s2@ applies one substitution to another, replacing all the +-- bindings in the second argument @s2@ with their values mentioned in the first +-- one (@s1@). +instance Substitutable Subst where + applySubst s (Subst target) = Subst (fmap (applySubst s) target) + +-- | >>> :{ +-- putPprLn (Subst +-- [ ("a", TFun "b" "b") +-- , ("b", TEither "c" "d") ]) +-- :} +-- { a ––> b → b +-- , b ––> Either c d } +instance Pretty Subst where + ppr (Subst s) = "{ " <> T.intercalate "\n, " [ ppr k <> " ––> " <> ppr v | (k,v) <- M.toList s ] <> " }" + +-- | Combine two substitutions by applying all substitutions mentioned in the +-- first argument to the type variables contained in the second. +instance Monoid Subst where + -- Considering that all we can really do with a substitution is apply it, we + -- can use the one of 'Substitutable's laws to show that substitutions + -- combine associatively, + -- + -- @ + -- applySubst (compose s1 (compose s2 s3)) + -- = applySubst s1 . applySubst (compose s2 s3) + -- = applySubst s1 . applySubst s2 . applySubst s3 + -- = applySubst (compose s1 s2) . applySubst s3 + -- = applySubst (compose (compose s1 s2) s3) + -- @ + mappend subst1 subst2 = Subst (s1 `M.union` s2) + where + Subst s1 = subst1 + Subst s2 = applySubst subst1 subst2 + + mempty = Subst M.empty + + + +-- ############################################################################# +-- ############################################################################# +-- * Typechecking +-- ############################################################################# +-- ############################################################################# + +-- $ Typechecking does two things: +-- +-- 1. If two types are not immediately identical, attempt to 'unify' them +-- to get a type compatible with both of them +-- 2. 'infer' the most general type of a value by comparing the values in its +-- definition with the 'Env'ironment + + + +-- ############################################################################# +-- ** Inference context +-- ############################################################################# + + + +-- | The inference type holds a supply of unique names, and can fail with a +-- descriptive error if something goes wrong. +-- +-- /Invariant:/ the supply must be infinite, or we might run out of names to +-- give to things. +newtype Infer a = Infer (ExceptT InferError (State [Name]) a) + deriving (Functor, Applicative, Monad) + +-- | Errors that can happen during the type inference process. +data InferError = + -- | Two types that don't match were attempted to be unified. + -- + -- For example, @a -> a@ and @Int@ do not unify. + -- + -- >>> putPprLn (CannotUnify (TFun "a" "a") (TConst "Int")) + -- Cannot unify a → a with Int + CannotUnify MType MType + + -- | A 'TVar' is bound to an 'MType' that already contains it. + -- + -- The canonical example of this is @λx. x x@, where the first @x@ + -- in the body has to have type @a -> b@, and the second one @a@. Since + -- they're both the same @x@, this requires unification of @a@ with + -- @a -> b@, which only works if @a = a -> b = (a -> b) -> b = …@, yielding + -- an infinite type. + -- + -- >>> putPprLn (OccursCheckFailed "a" (TFun "a" "a")) + -- Occurs check failed: a already appears in a → a + | OccursCheckFailed Name MType + + -- | The value of an unknown identifier was read. + -- + -- >>> putPprLn (UnknownIdentifier "a") + -- Unknown identifier: a + | UnknownIdentifier Name + deriving Show + +-- | >>> putPprLn (CannotUnify (TEither "a" "b") (TTuple "a" "b")) +-- Cannot unify Either a b with (a, b) +instance Pretty InferError where + ppr = \case + CannotUnify t1 t2 -> + "Cannot unify " <> ppr t1 <> " with " <> ppr t2 + OccursCheckFailed name ty -> + "Occurs check failed: " <> ppr name <> " already appears in " <> ppr ty + UnknownIdentifier name -> + "Unknown identifier: " <> ppr name + + + +-- | Evaluate a value in an 'Infer'ence context. +-- +-- >>> let expr = EAbs "f" (EAbs "g" (EAbs "x" (EApp (EApp "f" "x") (EApp "g" "x")))) +-- >>> putPprLn expr +-- λf g x. f x (g x) +-- >>> let inferred = runInfer (infer (Env []) expr) +-- >>> let demonstrate = \case Right (_, ty) -> T.putStrLn (":: " <> ppr ty) +-- >>> demonstrate inferred +-- :: (c → e → f) → (c → e) → c → f +runInfer :: Infer a -- ^ Inference data + -> Either InferError a +runInfer (Infer inf) = + evalState (runExceptT inf) (map Name (infiniteSupply alphabet)) + where + + alphabet = map T.singleton ['a'..'z'] + + -- [a, b, c] ==> [a,b,c, a1,b1,c1, a2,b2,c2, …] + infiniteSupply supply = supply <> addSuffixes supply (1 :: Integer) + where + addSuffixes xs n = map (\x -> addSuffix x n) xs <> addSuffixes xs (n+1) + addSuffix x n = x <> T.pack (show n) + + + +-- | Throw an 'InferError' in an 'Infer'ence context. +-- +-- >>> case runInfer (throw (UnknownIdentifier "var")) of Left err -> putPprLn err +-- Unknown identifier: var +throw :: InferError -> Infer a +throw = Infer . throwE + + + +-- ############################################################################# +-- ** Unification +-- ############################################################################# + +-- $ Unification describes the process of making two different types compatible +-- by specializing them where needed. A desirable property to have here is being +-- able to find the most general unifier. Luckily, we'll be able to do that in +-- our type system. + + + +-- | The unification of two 'MType's is the most general substituion that can be +-- applied to both of them in order to yield the same result. +-- +-- >>> let m1 = TFun "a" "b" +-- >>> putPprLn m1 +-- a → b +-- >>> let m2 = TFun "c" (TEither "d" "e") +-- >>> putPprLn m2 +-- c → Either d e +-- >>> let inferSubst = unify (m1, m2) +-- >>> case runInfer inferSubst of Right subst -> putPprLn subst +-- { a ––> c +-- , b ––> Either d e } +unify :: (MType, MType) -> Infer Subst +unify = \case + (TFun a b, TFun x y) -> unifyBinary (a,b) (x,y) + (TVar v, x) -> v `bindVariableTo` x + (x, TVar v) -> v `bindVariableTo` x + (TConst a, TConst b) | a == b -> pure mempty + (TList a, TList b) -> unify (a,b) + (TEither a b, TEither x y) -> unifyBinary (a,b) (x,y) + (TTuple a b, TTuple x y) -> unifyBinary (a,b) (x,y) + (a, b) -> throw (CannotUnify a b) + + where + + -- Unification of binary type constructors, such as functions and Either. + -- Unification is first done for the first operand, and assuming the + -- required substitution, for the second one. + unifyBinary :: (MType, MType) -> (MType, MType) -> Infer Subst + unifyBinary (a,b) (x,y) = do + s1 <- unify (a, x) + s2 <- unify (applySubst s1 (b, y)) + pure (s1 <> s2) + + + +-- | Build a 'Subst'itution that binds a 'Name' of a 'TVar' to an 'MType'. The +-- resulting substitution should be idempotent, i.e. applying it more than once +-- to something should not be any different from applying it only once. +-- +-- - In the simplest case, this just means building a substitution that just +-- does that. +-- - Substituting a 'Name' with a 'TVar' with the same name unifies a type +-- variable with itself, and the resulting substitution does nothing new. +-- - If the 'Name' we're trying to bind to an 'MType' already occurs in that +-- 'MType', the resulting substitution would not be idempotent: the 'MType' +-- would be replaced again, yielding a different result. This is known as the +-- Occurs Check. +bindVariableTo :: Name -> MType -> Infer Subst + +bindVariableTo name (TVar v) | boundToSelf = pure mempty + where + boundToSelf = name == v + +bindVariableTo name mType | name `occursIn` mType = throw (OccursCheckFailed name mType) + where + n `occursIn` ty = n `S.member` freeMType ty + +bindVariableTo name mType = pure (Subst (M.singleton name mType)) + + + +-- ############################################################################# +-- ** Type inference +-- ############################################################################# + +-- $ Type inference is the act of finding out a value's type by looking at the +-- environment it is in, in order to make it compatible with it. +-- +-- In literature, the Hindley-Damas-Milner inference algorithm ("Algorithm W") +-- is often presented in the style of logical formulas, and below you'll find +-- that version along with code that actually does what they say. +-- +-- These formulas look a bit like fractions, where the "numerator" is a +-- collection of premises, and the denominator is the consequence if all of them +-- hold. +-- +-- __Example:__ +-- +-- @ +-- Γ ⊢ even : Int → Bool Γ ⊢ 1 : Int +-- ––––––––––––––––––––––––––––––––––– +-- Γ ⊢ even 1 : Bool +-- @ +-- +-- means that if we have a value of type @Int -> Bool@ called "even" and a value +-- of type @Int@ called @1@, then we also have a value of type @Bool@ via +-- @even 1@ available to us. +-- +-- The actual inference rules are polymorphic versions of this example, and +-- the code comments will explain each step in detail. + + + +-- ----------------------------------------------------------------------------- +-- *** The language: typed lambda calculus +-- ----------------------------------------------------------------------------- + + + +-- | The syntax tree of the language we'd like to typecheck. You can view it as +-- a close relative to simply typed lambda calculus, having only the most +-- necessary syntax elements. +-- +-- Since 'ELet' is non-recursive, the usual fixed-point function +-- @fix : (a → a) → a@ can be introduced to allow recursive definitions. +data Exp = ELit Lit -- ^ True, 1 + | EVar Name -- ^ @x@ + | EApp Exp Exp -- ^ @f x@ + | EAbs Name Exp -- ^ @λx. e@ + | ELet Name Exp Exp -- ^ @let x = e in e'@ (non-recursive) + deriving Show + + + +-- | Literals we'd like to support. Since we can't define new data types in our +-- simple type system, we'll have to hard-code the possible ones here. +data Lit = LBool Bool + | LInteger Integer + deriving Show + + + +-- | >>> putPprLn (EAbs "f" (EAbs "g" (EAbs "x" (EApp (EApp "f" "x") (EApp "g" "x"))))) +-- λf g x. f x (g x) +instance Pretty Exp where + ppr (ELit lit) = ppr lit + + ppr (EVar name) = ppr name + + ppr (EApp f x) = pprApp1 f <> " " <> pprApp2 x + where + pprApp1 = \case + eLet@ELet{} -> "(" <> ppr eLet <> ")" + eLet@EAbs{} -> "(" <> ppr eLet <> ")" + e -> ppr e + pprApp2 = \case + eApp@EApp{} -> "(" <> ppr eApp <> ")" + e -> pprApp1 e + + ppr x@EAbs{} = pprAbs True x + where + pprAbs True (EAbs name expr) = "λ" <> ppr name <> pprAbs False expr + pprAbs False (EAbs name expr) = " " <> ppr name <> pprAbs False expr + pprAbs _ expr = ". " <> ppr expr + + ppr (ELet name value body) = + "let " <> ppr name <> " = " <> ppr value <> " in " <> ppr body + +-- | >>> putPprLn (LBool True) +-- True +-- +-- >>> putPprLn (LInteger 127) +-- 127 +instance Pretty Lit where + ppr = \case + LBool b -> showT b + LInteger i -> showT i + where + showT :: Show a => a -> Text + showT = T.pack . show + +-- | >>> "var" :: Exp +-- EVar (Name "var") +instance IsString Exp where + fromString = EVar . fromString + + + +-- ----------------------------------------------------------------------------- +-- *** Some useful definitions +-- ----------------------------------------------------------------------------- + + + +-- | Generate a fresh 'Name' in a type 'Infer'ence context. An example use case +-- of this is η expansion, which transforms @f@ into @λx. f x@, where "x" is a +-- new name, i.e. unbound in the current context. +fresh :: Infer MType +fresh = drawFromSupply >>= \case + Right name -> pure (TVar name) + Left err -> throw err + + where + + drawFromSupply :: Infer (Either InferError Name) + drawFromSupply = Infer (do + s:upply <- lift get + lift (put upply) + pure (Right s) ) + + + +-- | Add a new binding to the environment. +-- +-- The Haskell equivalent would be defining a new value, for example in module +-- scope or in a @let@ block. This corresponds to the "comma" operation used in +-- formal notation, +-- +-- @ +-- Γ, x:σ ≡ extendEnv Γ (x,σ) +-- @ +extendEnv :: Env -> (Name, PType) -> Env +extendEnv (Env env) (name, pType) = Env (M.insert name pType env) + + + +-- ----------------------------------------------------------------------------- +-- *** Inferring the types of all language constructs +-- ----------------------------------------------------------------------------- + + + +-- | Infer the type of an 'Exp'ression in an 'Env'ironment, resulting in the +-- 'Exp's 'MType' along with a substitution that has to be done in order to reach +-- this goal. +-- +-- This is widely known as /Algorithm W/. +infer :: Env -> Exp -> Infer (Subst, MType) +infer env = \case + ELit lit -> inferLit lit + EVar name -> inferVar env name + EApp f x -> inferApp env f x + EAbs x e -> inferAbs env x e + ELet x e e' -> inferLet env x e e' + + + +-- | Literals such as 'True' and '1' have their types hard-coded. +inferLit :: Lit -> Infer (Subst, MType) +inferLit lit = pure (mempty, TConst litTy) + where + litTy = case lit of + LBool {} -> "Bool" + LInteger {} -> "Integer" + + + +-- | Inferring the type of a variable is done via +-- +-- @ +-- x:σ ∈ Γ τ = instantiate(σ) +-- –––––––––––––––––––––––––––– [Var] +-- Γ ⊢ x:τ +-- @ +-- +-- This means that if @Γ@ /literally contains/ (@∈@) a value, then it also +-- /entails it/ (@⊢@) in all its instantiations. +inferVar :: Env -> Name -> Infer (Subst, MType) +inferVar env name = do + sigma <- lookupEnv env name -- x:σ ∈ Γ + tau <- instantiate sigma -- τ = instantiate(σ) + -- ------------------ + pure (mempty, tau) -- Γ ⊢ x:τ + + + +-- | Look up the 'PType' of a 'Name' in the 'Env'ironment. +-- +-- This checks whether @x:σ@ is /literally contained/ in @Γ@. For more details +-- about this, see the documentation of 'Env'. +-- +-- To give a Haskell analogon, looking up @id@ when @Prelude@ is loaded, the +-- resulting 'PType' would be @id@'s type, namely @forall a. a -> a@. +lookupEnv :: Env -> Name -> Infer PType +lookupEnv (Env env) name = case M.lookup name env of + Just x -> pure x + Nothing -> throw (UnknownIdentifier name) + + + +-- | Bind all quantified variables of a 'PType' to 'fresh' type variables. +-- +-- __Example:__ instantiating @forall a. a -> b -> a@ results in the 'MType' +-- @c -> b -> c@, where @c@ is a fresh name (to avoid shadowing issues). +-- +-- You can picture the 'PType' to be the prototype converted to an instantiated +-- 'MType', which can now be used in the unification process. +-- +-- Another way of looking at it is by simply forgetting which variables were +-- quantified, carefully avoiding name clashes when doing so. +-- +-- 'instantiate' can also be seen as the opposite of 'generalize', which we'll +-- need later to convert an 'MType' to a 'PType'. +instantiate :: PType -> Infer MType +instantiate (Forall qs t) = do + subst <- substituteAllWithFresh qs + pure (applySubst subst t) + + where + -- For each given name, add a substitution from that name to a fresh type + -- variable to the result. + substituteAllWithFresh :: Set Name -> Infer Subst + substituteAllWithFresh xs = do + let freshSubstActions = M.fromSet (const fresh) xs + freshSubsts <- sequenceA freshSubstActions + pure (Subst freshSubsts) + + + +-- | Function application captures the fact that if we have a function and an +-- argument we can give to that function, we also have the result value of the +-- result type available to us. +-- +-- @ +-- Γ ⊢ f : fτ Γ ⊢ x : xτ fxτ = fresh unify(fτ, xτ → fxτ) +-- ––––––––––––––––––––––––––––––––––––––––––––––––––––––––––– [App] +-- Γ ⊢ f x : fxτ +-- @ +-- +-- This rule says that given a function and a value with a type, the function +-- type has to unify with a function type that allows the value type to be its +-- argument. +inferApp + :: Env + -> Exp -- ^ __f__ x + -> Exp -- ^ f __x__ + -> Infer (Subst, MType) +inferApp env f x = do + (s1, fTau) <- infer env f -- f : fτ + (s2, xTau) <- infer (applySubst s1 env) x -- x : xτ + fxTau <- fresh -- fxτ = fresh + s3 <- unify (applySubst s2 fTau, TFun xTau fxTau) -- unify (fτ, xτ → fxτ) + let s = s3 <> s2 <> s1 -- -------------------- + pure (s, applySubst s3 fxTau) -- f x : fxτ + + + +-- | Lambda abstraction is based on the fact that when we introduce a new +-- variable, the resulting lambda maps from that variable's type to the type of +-- the body. +-- +-- @ +-- τ = fresh σ = ∀∅. τ Γ, x:σ ⊢ e:τ' +-- ––––––––––––––––––––––––––––––––––––– [Abs] +-- Γ ⊢ λx.e : τ→τ' +-- @ +-- +-- Here, @Γ, x:τ@ is @Γ@ extended by one additional mapping, namely @x:τ@. +-- +-- Abstraction is typed by extending the environment by a new 'MType', and if +-- under this assumption we can construct a function mapping to a value of that +-- type, we can say that the lambda takes a value and maps to it. +inferAbs + :: Env + -> Name -- ^ λ__x__. e + -> Exp -- ^ λx. __e__ + -> Infer (Subst, MType) +inferAbs env x e = do + tau <- fresh -- τ = fresh + let sigma = Forall [] tau -- σ = ∀∅. τ + env' = extendEnv env (x, sigma) -- Γ, x:σ … + (s, tau') <- infer env' e -- … ⊢ e:τ' + -- --------------- + pure (s, TFun (applySubst s tau) tau') -- λx.e : τ→τ' + + + +-- | A let binding allows extending the environment with new bindings in a +-- principled manner. To do this, we first have to typecheck the expression to +-- be introduced. The result of this is then generalized to a 'PType', since let +-- bindings introduce new polymorphic values, which are then added to the +-- environment. Now we can finally typecheck the body of the "in" part of the +-- let binding. +-- +-- Note that in our simple language, let is non-recursive, but recursion can be +-- introduced as usual by adding a primitive @fix : (a → a) → a@ if desired. +-- +-- @ +-- Γ ⊢ e:τ σ = gen(Γ,τ) Γ, x:σ ⊢ e':τ' +-- ––––––––––––––––––––––––––––––––––––––– [Let] +-- Γ ⊢ let x = e in e' : τ' +-- @ +inferLet + :: Env + -> Name -- ^ let __x__ = e in e' + -> Exp -- ^ let x = __e__ in e' + -> Exp -- ^ let x = e in __e'__ + -> Infer (Subst, MType) +inferLet env x e e' = do + (s1, tau) <- infer env e -- Γ ⊢ e:τ + let env' = applySubst s1 env + let sigma = generalize env' tau -- σ = gen(Γ,τ) + let env'' = extendEnv env' (x, sigma) -- Γ, x:σ + (s2, tau') <- infer env'' e' -- Γ ⊢ … + -- -------------------------- + pure (s2 <> s1, tau') -- … let x = e in e' : τ' + + + +-- | Generalize an 'MType' to a 'PType' by universally quantifying over all the +-- type variables contained in it, except those already free in the environment. +-- +-- >>> let tau = TFun "a" (TFun "b" "a") +-- >>> putPprLn tau +-- a → b → a +-- >>> putPprLn (generalize (Env [("x", Forall [] "b")]) tau) +-- ∀a. a → b → a +-- +-- In more formal notation, +-- +-- @ +-- gen(Γ,τ) = ∀{α}. τ +-- where {α} = free(τ) – free(Γ) +-- @ +-- +-- 'generalize' can also be seen as the opposite of 'instantiate', which +-- converts a 'PType' to an 'MType'. +generalize :: Env -> MType -> PType +generalize env mType = Forall qs mType + where + qs = freeMType mType `S.difference` freeEnv env diff --git a/Main.hs b/Main.hs new file mode 100644 index 0000000..198db12 --- /dev/null +++ b/Main.hs @@ -0,0 +1,185 @@ +{-# LANGUAGE OverloadedLists #-} +{-# LANGUAGE OverloadedStrings #-} + +module Main where + + + +import qualified Data.Map as M +import Data.Monoid +import Data.Text (Text) +import qualified Data.Text.IO as T + +import HindleyMilner + + + +-- ############################################################################# +-- ############################################################################# +-- * Testing +-- ############################################################################# +-- ############################################################################# + + + +-- ############################################################################# +-- ** A small custom Prelude +-- ############################################################################# + + + +prelude :: Env +prelude = Env (M.fromList + [ ("(*)", Forall [] (tInteger ~> tInteger ~> tInteger)) + , ("(+)", Forall [] (tInteger ~> tInteger ~> tInteger)) + , ("(,)", Forall ["a","b"] ("a" ~> "b" ~> TTuple "a" "b")) + , ("(-)", Forall [] (tInteger ~> tInteger ~> tInteger)) + , ("(.)", Forall ["a", "b", "c"] (("b" ~> "c") ~> ("a" ~> "b") ~> "a" ~> "c")) + , ("(<)", Forall [] (tInteger ~> tInteger ~> tBool)) + , ("(<=)", Forall [] (tInteger ~> tInteger ~> tBool)) + , ("(>)", Forall [] (tInteger ~> tInteger ~> tBool)) + , ("(>=)", Forall [] (tInteger ~> tInteger ~> tBool)) + , ("const", Forall ["a","b"] ("a" ~> "b" ~> "a")) + , ("Cont/>>=", Forall ["a"] ((("a" ~> "r") ~> "r") ~> ("a" ~> (("b" ~> "r") ~> "r")) ~> (("b" ~> "r") ~> "r"))) + , ("find", Forall ["a","b"] (("a" ~> tBool) ~> TList "a" ~> tMaybe "a")) + , ("fix", Forall ["a"] (("a" ~> "a") ~> "a")) + , ("foldr", Forall ["a","b"] (("a" ~> "b" ~> "b") ~> "b" ~> TList "a" ~> "b")) + , ("id", Forall ["a"] ("a" ~> "a")) + , ("ifThenElse", Forall ["a"] (tBool ~> "a" ~> "a" ~> "a")) + , ("Left", Forall ["a","b"] ("a" ~> TEither "a" "b")) + , ("length", Forall ["a"] (TList "a" ~> tInteger)) + , ("map", Forall ["a","b"] (("a" ~> "b") ~> TList "a" ~> TList "b")) + , ("reverse", Forall ["a"] (TList "a" ~> TList "a")) + , ("Right", Forall ["a","b"] ("b" ~> TEither "a" "b")) + , ("[]", Forall ["a"] (TList "a")) + , ("(:)", Forall ["a"] ("a" ~> TList "a" ~> TList "a")) + ]) + where + tBool = TConst "Bool" + tInteger = TConst "Integer" + tMaybe = TEither (TConst "()") + + + +-- | Synonym for 'TFun' to make writing type signatures easier. +-- +-- Instead of +-- +-- @ +-- Forall ["a","b"] (TFun "a" (TFun "b" "a")) +-- @ +-- +-- we can write +-- +-- @ +-- Forall ["a","b"] ("a" ~> "b" ~> "a") +-- @ +(~>) :: MType -> MType -> MType +(~>) = TFun +infixr 9 ~> + + + +-- ############################################################################# +-- ** Run it! +-- ############################################################################# + + + +-- | Run type inference on a cuple of values +main :: IO () +main = do + let inferAndPrint = T.putStrLn . (" " <>) . showType prelude + T.putStrLn "Well-typed:" + do + inferAndPrint (lambda ["x"] "x") + inferAndPrint (lambda ["f","g","x"] (apply "f" ["x", apply "g" ["x"]])) + inferAndPrint (lambda ["f","g","x"] (apply "f" [apply "g" ["x"]])) + inferAndPrint (lambda ["m", "k", "c"] (apply "m" [lambda ["x"] (apply "k" ["x", "c"])])) -- >>= for Cont + inferAndPrint (lambda ["f"] (apply "(.)" ["reverse", apply "map" ["f"]])) + inferAndPrint (apply "find" [lambda ["x"] (apply "(>)" ["x", int 0])]) + inferAndPrint (apply "map" [apply "map" ["map"]]) + inferAndPrint (apply "(*)" [int 1, int 2]) + inferAndPrint (apply "foldr" ["(+)", int 0]) + inferAndPrint (apply "map" ["length"]) + inferAndPrint (apply "map" ["map"]) + inferAndPrint (lambda ["x"] (apply "ifThenElse" [apply "(<)" ["x", int 0], int 0, "x"])) + inferAndPrint (lambda ["x"] (apply "fix" [lambda ["xs"] (apply "(:)" ["x", "xs"])])) + T.putStrLn "Ill-typed:" + do + inferAndPrint (apply "(*)" [int 1, bool True]) + inferAndPrint (apply "foldr" [int 1]) + inferAndPrint (lambda ["x"] (apply "x" ["x"])) + inferAndPrint (lambda ["x"] (ELet "xs" (apply "(:)" ["x", "xs"]) "xs")) + + + +-- | Build multiple lambda bindings. +-- +-- Instead of +-- +-- @ +-- EAbs "f" (EAbs "x" (EApp "f" "x")) +-- @ +-- +-- we can write +-- +-- @ +-- lambda ["f", "x"] (EApp "f" "x") +-- @ +-- +-- for +-- +-- @ +-- λf x. f x +-- @ +lambda :: [Name] -> Exp -> Exp +lambda names expr = foldr EAbs expr names + + + +-- | Apply a function to multiple arguments. +-- +-- Instead of +-- +-- @ +-- EApp (EApp (EApp "f" "x") "y") "z") +-- @ +-- +-- we can write +-- +-- @ +-- apply "f" ["x", "y", "z"] +-- @ +-- +-- for +-- +-- @ +-- f x y z +-- @ +apply :: Exp -> [Exp] -> Exp +apply = foldl EApp + + + +-- | Construct an integer literal. +int :: Integer -> Exp +int = ELit . LInteger + + + +-- | Construct a boolean literal. +bool :: Bool -> Exp +bool = ELit . LBool + + + +-- | Convenience function to run type inference algorithm +showType :: Env -- ^ Starting environment, e.g. 'prelude'. + -> Exp -- ^ Expression to typecheck + -> Text -- ^ Text representation of the result. Contains an error + -- message on failure. +showType env expr = + case (runInfer . fmap (generalize (Env mempty) . uncurry applySubst) . infer env) expr of + Left err -> "Error inferring type of " <> ppr expr <>": " <> ppr err + Right ty -> ppr expr <> " :: " <> ppr ty