R/mm.R

#' Markov model specification
#' 
#' @description Creates a model specification of a Markov model.
#' 
#' @param states Vector of state space of length s.
#' @param k Order of the Markov chain.
#' @param init Vector of initial distribution of length s ^ k.
#' @param ptrans Matrix of transition probabilities of dimension \eqn{(s, s)}.
#' @return An object of class [mm].
#' 
#' @seealso [simulate.mm], [fitmm]
#' 
#' @export
#' 
#' @examples
#' states <- c("a", "c", "g", "t")
#' s <- length(states)
#' k <- 1
#' init <- rep.int(1 / s, s)
#' p <- matrix(c(0, 0, 0.3, 0.4, 0, 0, 0.5, 0.2, 0.7, 0.5, 
#'               0, 0.4, 0.3, 0.5, 0.2, 0), ncol = s)
#' 
#' # Specify a Markov model of order 1
#' markov <- mm(states = states, init = init, ptrans = p, k = k)
#'
#' @testexamples
#' expect_true(all(markov$param == p))
#' expect_true(all(markov$init == init))
#' 
mm <- function(states, init, ptrans, k = 1) {
  
  #############################
  # Checking parameter states
  #############################
  
  s <- length(states)
  
  if (!(is.vector(states) & (length(unique(states)) == s))) {
    stop("The state space 'states' is not a vector of unique elements")
  }
  
  #############################
  # Checking parameter init
  #############################
  
  if (!(is.numeric(init) & !anyNA(init) & is.vector(init) & length(init) == s ^ k)) {
    stop("'init' is not a numeric vector of length s ^ k")
  }
  
  if (!(all(init >= 0) & all(init <= 1))) {
    stop("Probabilities in 'init' must be between [0, 1]")
  }
  
  if (!((sum(init) >= 1 - sqrt(.Machine$double.eps)) | (sum(init) <= 1 + sqrt(.Machine$double.eps)))) {
    stop("The sum of 'init' is not equal to one")
  }
  
  #############################
  # Checking parameter k
  #############################
  
  if (!((k > 0) & ((k %% 1) == 0))) {
    stop("'k' must be a strictly positive integer")
  }
  
  #############################
  # Checking parameter ptrans
  #############################
  
  if (!(is.numeric(ptrans) & !anyNA(ptrans) & is.matrix(ptrans))) {
    stop("'ptrans' is not a matrix with numeric values")
  }
  
  if (!((dim(ptrans)[1] == s ^ k) & (dim(ptrans)[2] == s))) {
    stop("The dimension of the matrix 'ptrans' must be equal to (s ^ k, s)")
  }
  
  if (!(all(ptrans >= 0) & all(ptrans <= 1))) {
    stop("Probabilities in 'ptrans' must be between [0, 1]")
  }
  
  if (!all((apply(ptrans, 1, sum) >= 1 - sqrt(.Machine$double.eps)) | (apply(ptrans, 1, sum) <= 1 + sqrt(.Machine$double.eps)))) {
    stop("'ptrans' is not a stochastic matrix (column sums accross rows must be equal to one for each row)")
  }
  
  
  # Add names to the attributes init, ptrans, for readability
  colnames(ptrans) <- words(length = 1, alphabet = states)
  row.names(ptrans) <- words(length = k, alphabet = states)
  names(init) <- row.names(ptrans)
  
  ans <- list(states = states, s = s, k = k, init = init, ptrans = ptrans)
  
  class(ans) <- "mm"
  
  return(ans)
}


#' Function to check if an object is of class `mm`
#' 
#' @description `is.mm` returns `TRUE` if `x` is an object of 
#'   class `mm`.
#' 
#' @param x An arbitrary R object.
#' @return `is.mm` returns `TRUE` or `FALSE` depending on whether `x` is an 
#'   object of class `mm` or not.
#' 
#' @export
#' 
is.mm <- function(x) {
  inherits(x, "mm")
}


# Method to get the stationary distribution
#' @export
get.stationaryDistribution.mm <- function(x) {
  
  p <- .stationaryDistribution(ptrans = .blockMatrix(ptrans = x$ptrans))
  
  return(p)
}


# Method to get the number of parameters
# (useful for the computation of criteria such as AIC and BIC)
.get.Kpar.mm <- function(x) {
  
  s <- x$s
  
  kpar <- (s - 1) * s ^ x$k
  
  return(kpar)
}


# Method to get the number of parameters
# (useful for the computation of criteria such as AIC and BIC)
#' @export
get.Kpar.mm <- function(x) {
  
  kpar <- .get.Kpar.mm(x)
  
  return(kpar)
}


#' Log-likelihood Function
#' 
#' @description Computation of the log-likelihood for a Markov model
#' 
#' @param x An object of class [mm].
#' @param processes An object of class `processesMarkov`.
#' 
#' @noRd
#' 
.logLik.mm <- function(x, processes) {
  
  #############################
  # Let's compute the log-likelihood
  #############################
  
  Nstarti <- processes$Nstarti
  maskNstarti <- processes$Nstarti != 0 & x$init != 0
  
  Nij <- processes$Nij
  maskNij <- processes$Nij != 0 & x$ptrans != 0
  
  logLik <- sum(Nstarti[maskNstarti] * log(x$init[maskNstarti])) + sum(Nij[maskNij] * log(x$ptrans[maskNij]))
  
  return(logLik)
  
}


#' Akaike Information Criterion (AIC)
#' 
#' @description Computation of the Akaike Information Criterion.
#' 
#' @param x An object of class [mm].
#' @param sequences A list of vectors representing the sequences for which the 
#'   AIC will be computed based on `x`.
#' @return Value of the AIC.
#' 
#' @noRd
#' 
#' @export
#' 
AIC.mm <- function(object, ...) {
  
  sequences = list(...)[1]
  logLik <- logLik(object, sequences)
  
  kpar <- .get.Kpar(object)
  
  AIC <- -2 * logLik + 2 * kpar
  
  return(AIC)
  
}


#' Bayesian Information Criterion (BIC)
#' 
#' @description Computation of the Bayesian Information Criterion.
#' 
#' @param x An object of class [mm].
#' @param sequences A list of vectors representing the sequences for which the 
#'   BIC will be computed based on `x`.
#' @return Value of the BIC.
#' 
#' @noRd
#' 
#' @export
#' 
BIC.mm <- function(object, ...) {
  
  sequences = list(...)[1]
  logLik <- logLik(object, sequences)
  
  kpar <- .get.Kpar(object)
  
  n <- sum(sapply(sequences, length))
  
  BIC <- -2 * logLik + log(n) * kpar
  
  return(BIC)
}


#' Loglikelihood
#' 
#' @description Computation of the log-likelihood for a Markov model
#' 
#' @param x An object of class [mm].
#' @param sequences A list of vectors representing the sequences for which the 
#'   log-likelihood will be computed based on `x`.
#' @return Value of the log-likelihood.
#' 
#' @noRd
#' 
#' @export
#' 
logLik.mm <- function(object, ...) {

  sequences = list(...)[1]
  
  #############################
  # Checking parameters sequences and states
  #############################
  
  if (!(is.list(sequences) & all(sapply(sequences, class) %in% c("character", "numeric")))) {
    stop("The parameter 'sequences' should be a list of vectors")
  }
  
  if (!all(unique(unlist(sequences)) %in% object$states)) {
    stop("Some states in the list of observed sequences 'sequences' 
         are not in the state space given by the model 'object'")
  }
  
  processes <- processesMarkov(sequences = sequences, states = object$states, k = object$k, verbose = FALSE)
  logLik <- .logLik.mm(x = object, processes = processes)
  
  return(logLik)
  
}


#' Simulates k-th order Markov chains
#' 
#' @description Simulates k-th order Markov chains.
#' 
#' @details If `nsim` is a single integer then a chain of that length is 
#'   produced. If `nsim` is a vector of integers, then `length(nsim)` 
#'   sequences are generated with respective lengths.
#' 
#' @param object An object of class [mm].
#' @param nsim An integer or vector of integers (for multiple sequences) 
#'   specifying the length of the sequence(s).
#' @param seed Optional. `seed` for the random number generator. 
#'   If no `seed` is given, then seed is set by using the command 
#'   `set.seed(round(as.numeric(Sys.time()))`.
#' @param ... further arguments passed to or from other methods.
#' @return A list of vectors representing the sequences.
#' 
#' @seealso [mm], [fitmm]
#' 
#' @export
#' 
#' @examples 
#' states <- c("a", "c", "g", "t")
#' s <- length(states)
#' k <- 2
#' init <- rep.int(1 / s ^ k, s ^ k)
#' p <- matrix(0.25, nrow = s ^ k, ncol = s)
#' 
#' # Specify a Markov model of order 1
#' markov <- mm(states = states, init = init, ptrans = p, k = k)
#' 
#' seqs <- simulate(object = markov, nsim = c(1000, 10000, 2000), seed = 150)
#'
#' @testexamples
#' expect_equal(length(seqs), 3)
#' expect_equal(sapply(seqs, length), c(1000, 10000, 2000))
#' expect_equal(seqs[[1]][995:1000], c("g","c","a","c","a","t"))
#'
simulate.mm <- function(object, nsim = 1, seed = NULL, ...) {
  
  #############################
  # Checking parameter nsim
  #############################
  
  if (!all(is.numeric(nsim), is.vector(nsim), !anyNA(nsim), nsim > 0, (nsim %% 1) == 0)) {
    stop("'nsim' must be a strictly positive integer or a vector of striclty positive integers")
  }
  
  #############################
  # Checking parameter seed
  #############################
  
  if (!is.null(seed)) {
    if (!all(is.numeric(seed), seed >= 0, (seed %% 1) == 0)) {
      stop("'seed' must be a positive integer")
    }
    {
      set.seed(seed)
    }
  }
  
  
  s <- length(object$states)
  out <- list()
  nbseq <- length(nsim)
  
  for (n in 1:nbseq) {
    
    if (nsim[n] <= object$k) {
      
      y <- s2c(sample(x = names(object$init), size = 1, prob = object$init))
    
    } else {
    
      y <- rep.int(NA, nsim[n])
      
      # Initial state(s)
      y[1:object$k] <- s2c(sample(x = names(object$init), size = 1, prob = object$init))
      
      for (i in 1:(nsim[n] - object$k)) {
        ind <- which(object$states == y[i + object$k - 1])
        if (object$k > 1) {
          for (j in (object$k - 2):0) {
            ind <- ind + s ^ (j + 1) * (which(object$states == y[i + j]) - 1)
          }
        }
        y[i + object$k] <- sample(object$states, 1, prob = object$ptrans[ind, ])
      }
      
    }
    
    out[[n]] <- y
    
  }
  
  return(out)
  
}