Inference.hs

{-# OPTIONS_GHC -Wall -fwarn-incomplete-patterns -fwarn-tabs #-}
{-# LANGUAGE FlexibleInstances #-}

module Inference where

import Language
import Builtin

import Data.Map (Map)
import qualified Data.Map as Map
import qualified Data.Maybe as Maybe
import Text.Printf
import Control.Monad.Writer
import Control.Monad.State
import Control.Monad.Except


-- Types and useful functions to express substitution.

type TypeVariable = String

type SSubst = Map TypeVariable Stack
type VSubst = Map TypeVariable ValueType

-- Composes stack substitutions.
afterSSubst :: SSubst -> SSubst -> SSubst
s1 `afterSSubst` s2 = Map.map (stackSubst s1) s2 `Map.union` s1

-- Composes value substitutions.
afterVSubst :: VSubst -> VSubst -> VSubst
s1 `afterVSubst` s2 = Map.map (valueSubst s1) s2 `Map.union` s1


-- A type class for types in which we can substitute. There are instances for
-- both [Stack] and [ValueType], which are mutually recursive; the substitution
-- implementations are also mutually recursive.

-- One may wonder why [stackVars] and [valueVars] return lists instead of types.
-- This was so that [freshen], defined below, would actually return a "canonical
-- freshening", which was useful for testing purposes.

class Substitutable a where
  stackVars :: a -> [TypeVariable]
  valueVars :: a -> [TypeVariable]
  stackSubst :: SSubst -> a -> a
  valueSubst :: VSubst -> a -> a
  subst :: SSubst -> VSubst -> a -> a
  subst ss vs = valueSubst vs . stackSubst ss

instance Substitutable Stack where
  stackVars (S a s) = a : concatMap stackVars s
  valueVars (S _ s) = concatMap valueVars s
  stackSubst ss (S a s) = stk
    where s' = map (stackSubst ss) s
    -- if there is a mapping, treat it as a prefix of the other stack
          stk = maybe (S a s') (\(S a' s'') -> S a' (s'' ++ s'))
                      (Map.lookup a ss)
  valueSubst vs (S a s) = S a (map (valueSubst vs) s)

instance Substitutable ValueType where
  stackVars (VListTy t)       = stackVars t
  stackVars (VFuncTy (F l r)) = stackVars l ++ stackVars r
  stackVars _                 = []

  valueVars VIntTy            = []
  valueVars VBoolTy           = []
  valueVars (VListTy t)       = valueVars t
  valueVars (VFuncTy (F l r)) = valueVars l ++ valueVars r
  valueVars (VVarTy v)        = [v]

  stackSubst _ VIntTy             = VIntTy
  stackSubst _ VBoolTy            = VBoolTy
  stackSubst ss (VListTy t)       = VListTy $ stackSubst ss t
  stackSubst ss (VFuncTy (F l r)) =
    VFuncTy $ F (stackSubst ss l) (stackSubst ss r)
  stackSubst _ t@(VVarTy _)       = t

  valueSubst _ VIntTy             = VIntTy
  valueSubst _ VBoolTy            = VBoolTy
  valueSubst vs (VListTy t)       = VListTy $ valueSubst vs t
  valueSubst vs (VFuncTy (F l r)) =
    VFuncTy $ F (valueSubst vs l) (valueSubst vs r)
  valueSubst vs t@(VVarTy v)      = Maybe.fromMaybe t (Map.lookup v vs)

instance Substitutable FuncType where
  stackVars (F l r) = stackVars l ++ stackVars r
  valueVars (F l r) = valueVars l ++ valueVars r
  stackSubst ss (F l r) = F (stackSubst ss l) (stackSubst ss r)
  valueSubst vs (F l r) = F (valueSubst vs l) (valueSubst vs r)


-- Constraints which may be placed on types. We may have a constraint that two
-- value types are equal, or a constraint that two stacks are equal.

data VConstraint = VEqual ValueType ValueType
  deriving (Eq)
data SConstraint = SEqual Stack Stack
  deriving (Eq)

instance Show VConstraint where
  show (VEqual s t) = show s ++ " :~: " ++ show t
instance Show SConstraint where
  show (SEqual s t) = show s ++ " :~: " ++ show t

-- Equates two stack types.
equateSTy :: Monad m => Stack -> Stack -> WriterT [SConstraint] m ()
equateSTy s t | s == t    = return ()
              | otherwise = tell [SEqual s t]
-- We don't need a function to equate value types like this; VConstraints are
-- only ever generated by the stack constraint solver.


-- The type-checking and inference monad, plus the function that generates
-- constraints from an untyped term.

type TC a =
  WriterT [SConstraint]
  (StateT Int
  (Either String))
  a

freshVVar :: TC TypeVariable
freshVVar =
  do i <- get
     put $ succ i
     return $ "t" ++ show i

freshSVar :: TC TypeVariable
freshSVar =
  do i <- get
     put $ succ i
     return $ "s" ++ show i

-- Generates fresh variables for everything within a function type, recursively.
freshen :: FuncType -> TC FuncType
freshen t =
  do let sVars = stackVars t
         vVars = valueVars t
     newSVars <- mapM (\v -> freshSVar >>= \v' -> return (v, S v' [])) sVars
     newVVars <- mapM (\v -> freshVVar >>= \v' -> return (v, VVarTy v')) vVars
     return $ subst (Map.fromList newSVars) (Map.fromList newVVars) t

runTC :: TC a -> Either String (a, [SConstraint])
runTC m = evalStateT (runWriterT m) 0

type Context = Map String FuncType

inferType :: Context -> Term () -> TC (Term FuncType)

inferType _ (IdTerm ()) =
  do a <- freshSVar
     return . IdTerm $ F (S a []) (S a [])

inferType c (CatTerm () t u) =
  do t' <- inferType c t
     u' <- inferType c u
     let F (S a1 t1) (S b1 u1) = extract t'
     let F (S a2 t2) (S b2 u2) = extract u'
     equateSTy (S b1 u1) (S a2 t2)
     return $ CatTerm (F (S a1 t1) (S b2 u2)) t' u'

inferType c (BuiltinTerm () s) =
  case Map.lookup s c of
    Just t  -> flip BuiltinTerm s <$> freshen t
    Nothing -> throwError $ "unbound identifier: " ++ s

inferType _ (PushIntTerm () i) =
  do a <- freshSVar
     return $ PushIntTerm (F (S a []) (S a [VIntTy])) i

inferType _ (PushBoolTerm () b) =
  do a <- freshSVar
     return $ PushBoolTerm (F (S a []) (S a [VBoolTy])) b

inferType _ (PushNilTerm ()) =
  do a <- freshSVar
     b <- freshVVar
     return $ PushNilTerm (F (S a []) (S a [VListTy $ VVarTy b]))

inferType c (PushFuncTerm () t) =
  do t' <- inferType c t
     a <- freshSVar
     return $ PushFuncTerm (F (S a []) (S a [VFuncTy $ extract t'])) t'

genConstraints :: Term () -> Either String (Term FuncType, [SConstraint])
genConstraints = runTC . inferType builtinTypes


-- Functions to compute the most general unifiers of two stacks and of two value
-- types. This allows us to solve the generated constraints.

-- This is a little non-obvious. When we do a stack variable assignment, we are
-- constraining one stack variable to be equal to the prefix of another stack.
-- So we have to strip off the common suffix of the two stacks before generating
-- the constraint.
mguStack :: Stack -> Stack -> WriterT [VConstraint] (Either String) SSubst
mguStack (S a s) (S b t) | length s < length t = mguStack (S b t) (S a s)
                         | otherwise =
  do tell $ zipWith VEqual (reverse s) (reverse t)
     lift $ stackVarAsgn b (S a (take (length s - length t) s))

mguValue :: ValueType -> ValueType -> Either String VSubst
mguValue VIntTy VIntTy = return Map.empty
mguValue VBoolTy VBoolTy = return Map.empty
mguValue (VListTy t1) (VListTy t2) = mguValue t1 t2
mguValue (VFuncTy (F (S _ s) (S _ t))) (VFuncTy (F (S _ s') (S _ t'))) =
  if (length s, length t) == (length s', length t')
    then foldM (\vs (vt1, vt2) ->
                 afterVSubst vs <$>
                 mguValue (valueSubst vs vt1) (valueSubst vs vt2))
               Map.empty (zip (s ++ t) (s' ++ t'))
    else throwError $ printf "VFuncTy stacks mismatched: %s -> %s <> %s -> %s"
                             (show s) (show t) (show s') (show t')
mguValue (VVarTy v) t = valueVarAsgn v t
mguValue t (VVarTy v) = valueVarAsgn v t
mguValue _ _          = throwError "structural mismatch"

stackVarAsgn :: TypeVariable -> Stack -> Either String SSubst
stackVarAsgn a s
  | s == S a [] = return Map.empty
  | a `elem` stackVars s =
      throwError $ "occurs check fails: " ++ show a ++ " in " ++ show s
  | otherwise = return $ Map.singleton a s

valueVarAsgn :: TypeVariable -> ValueType -> Either String VSubst
valueVarAsgn a t
  | t == VVarTy a = return Map.empty
  | a `elem` valueVars t =
      throwError $ "occurs check fails: " ++ show a ++ " in " ++ show t
  | otherwise = return $ Map.singleton a t

solveStack :: [SConstraint] -> Either String (SSubst, [VConstraint])
solveStack =
  foldM (\(subst1, vcs1) (SEqual s t) -> do
          (subst2, vcs2) <- runWriterT $
                            mguStack (stackSubst subst1 s) (stackSubst subst1 t)
          return (subst2 `afterSSubst` subst1, vcs1 ++ vcs2)) (Map.empty, [])

solveValue :: SSubst -> [VConstraint] -> Either String VSubst
solveValue ss =
  foldM (\subst1 (VEqual t1 t2) -> do
          subst2 <- mguValue (subst ss subst1 t1) (subst ss subst1 t2)
          return $ subst2 `afterVSubst` subst1) Map.empty


-- The overall type inference algorithm. It produces a term with type
-- annotations. The REPL prints the main type, but the annotated abstract syntax
-- is only really used in formulating one of our QC properties.

typeInference :: Term () -> Either String (Term FuncType)
typeInference t =
  do (t', scs) <- genConstraints t
     (ss, vcs) <- solveStack scs
     vs <- solveValue ss vcs
     return $ fmap (subst ss vs) t'

-- Performs type inference with the additional constraint that the resulting
-- term must be executable on an empty stack.
typeInferenceOnEmpty :: Term () -> Either String (Term FuncType)
typeInferenceOnEmpty t =
  do t' <- typeInference t
     let F (S _ s) _ = extract t'
     if null s
       then return t'
       else throwError $ "term expecting a non-empty stack: " ++ show s