From 404da436e9ccbdc1aba245badebc25991328d527 Mon Sep 17 00:00:00 2001
From: DanielaCorbetta <corbettadaniela@gmail.com>
Date: Wed, 22 May 2024 12:01:13 -0400
Subject: [PATCH] indent

---
 R/getPredictionSets.R           | 193 +++++++++++++------------
 R/old/ancestors.R               |  11 +-
 R/old/children.R                |  11 +-
 R/old/conformal.R               | 138 +++++++++---------
 R/old/getHierarchicalPredSets.R | 112 ++++++++-------
 R/old/predSets.R                |  25 ++--
 R/old/scores.R                  |   2 +-
 R/old/somegraphs.R              | 126 ++++++++--------
 R/old/utils_Hier.R              | 160 +++++++++++----------
 R/old/utils_Resample.R          | 248 ++++++++++++++++----------------
 R/utils_ConformalPS.R           |  18 +--
 R/utils_HierarchicalPS.R        |  89 +++++++-----
 R/utils_Resample.R              |  32 +++--
 man/getPredictionSets.Rd        |  32 ++---
 vignettes/vignette.Rmd          |  10 +-
 15 files changed, 624 insertions(+), 583 deletions(-)

diff --git a/R/getPredictionSets.R b/R/getPredictionSets.R
index 7c1a408..e4385e2 100644
--- a/R/getPredictionSets.R
+++ b/R/getPredictionSets.R
@@ -109,7 +109,7 @@
 #' @examples
 #' # random p matrix
 #' set.seed(1040)
-#' p <- matrix(rnorm(2000*4), ncol=4)
+#' p <- matrix(rnorm(2000 * 4), ncol = 4)
 #' # Normalize the matrix p to have all numbers between 0 and 1 that sum to 1
 #' # by row
 #' p <- exp(p - apply(p, 1, max))
@@ -118,25 +118,25 @@
 #' colnames(p) <- cell.types
 #'
 #' # Take 1000 rows as calibration and 1000 as test
-#' p.cal <- p[1:1000,]
-#' p.test <- p[1001:2000,]
+#' p.cal <- p[1:1000, ]
+#' p.test <- p[1001:2000, ]
 #'
 #' # Randomly create the vector of real cell types for p.cal and p.test
-#' y.cal <- sample(cell.types, 1000, replace=TRUE)
-#' y.test <- sample(cell.types, 1000, replace=TRUE)
+#' y.cal <- sample(cell.types, 1000, replace = TRUE)
+#' y.test <- sample(cell.types, 1000, replace = TRUE)
 #'
 #' # Obtain conformal prediction sets
-#' conf.sets <- getPredictionSets(x.query=p.test,
-#'                                x.cal=p.cal,
-#'                                y.cal=y.cal,
-#'                                onto=NULL,
-#'                                alpha=0.1,
-#'                                follow.ontology=FALSE,
-#'                                resample.cal=FALSE,
-#'                                labels=cell.types,
-#'                                return.sc=FALSE
-#'                                )
-#'
+#' conf.sets <- getPredictionSets(
+#'     x.query = p.test,
+#'     x.cal = p.cal,
+#'     y.cal = y.cal,
+#'     onto = NULL,
+#'     alpha = 0.1,
+#'     follow.ontology = FALSE,
+#'     resample.cal = FALSE,
+#'     labels = cell.types,
+#'     return.sc = FALSE
+#' )
 #'
 #' @importFrom foreach %dopar%
 #' @importFrom foreach foreach
@@ -147,52 +147,53 @@
 #' @importFrom stats quantile
 #' @export
 
-getPredictionSets <- function(x.query, x.cal, y.cal, onto=NULL, alpha = 0.1,
-                              lambdas = seq(0.001,0.999,length.out=100),
-                              follow.ontology=TRUE,
-                              resample.cal=FALSE,
-                              labels=NULL,
-                              return.sc=NULL,
-                              pr.name="pred.set"){
-
+getPredictionSets <- function(x.query, x.cal, y.cal, onto = NULL, alpha = 0.1,
+    lambdas = seq(0.001, 0.999, length.out = 100),
+    follow.ontology = TRUE,
+    resample.cal = FALSE,
+    labels = NULL,
+    return.sc = NULL,
+    pr.name = "pred.set") {
     ## Sanity checks
 
-    if(follow.ontology & is.null(onto)){
+    if (follow.ontology & is.null(onto)) {
         stop("An ontology is required for hierarchical prediction set.
              Please provide one or ask for conformal prediction set
              (follow.ontology=FALSE)")
     }
 
-    if(is.null(onto) & is.null(labels)){
+    if (is.null(onto) & is.null(labels)) {
         stop("Please provide cell labels (labels parameter)")
     }
 
-    if(isa(x.query, "SpatialExperiment") |
-       isa(x.query, "SingleCellExperiment") |
-       isa(x.query, "SummarizedExperiment"))
+    if (isa(x.query, "SpatialExperiment") |
+        isa(x.query, "SingleCellExperiment") |
+        isa(x.query, "SummarizedExperiment")) {
         sc <- TRUE
-    else if(is.matrix(x.query))
+    } else if (is.matrix(x.query)) {
         sc <- FALSE
-    else
+    } else {
         stop("Please provide as input in x.query a SpatialExperiment,
               SingleCellExperiment or a matrix")
+    }
 
-    if(!is.null(return.sc)){
-      if(return.sc==TRUE & !sc){
-        stop("If x.query is a matrix output has to be a list
+    if (!is.null(return.sc)) {
+        if (return.sc == TRUE & !sc) {
+            stop("If x.query is a matrix output has to be a list
                  (return.sc=FALSE)")
-      }
+        }
     }
 
     # Retrieve labels from the ontology (need to add retrieval from y.cal/data
     # when follow.ontology=FALSE)
-    if(is.null(labels))
-        labels <- V(onto)$name[degree(onto, mode="out")==0]
+    if (is.null(labels)) {
+        labels <- V(onto)$name[degree(onto, mode = "out") == 0]
+    }
     K <- length(labels)
 
     # If input is not a matrix, retrieve prediction matrix from colData
     # might turn this into a helper function
-    if(!is.matrix(x.query)){
+    if (!is.matrix(x.query)) {
         # n.query <- ncol(x.query)
         # p.query <- matrix(NA, nrow=n.query, ncol=K)
         # colnames(p.query) <- labels
@@ -200,10 +201,11 @@ getPredictionSets <- function(x.query, x.cal, y.cal, onto=NULL, alpha = 0.1,
         #     p.query[,i] <- colData(x.query)[[i]]
         # }
         p.query <- .retrievePredMatrix(x.query)
+    } else {
+        p.query <- x.query
     }
-    else p.query <- x.query
 
-    if(!is.matrix(x.cal)){
+    if (!is.matrix(x.cal)) {
         # n.cal <- ncol(x.cal)
         # p.cal <- matrix(NA, nrow=n.cal, ncol=K)
         # colnames(p.cal) <- labels
@@ -211,48 +213,62 @@ getPredictionSets <- function(x.query, x.cal, y.cal, onto=NULL, alpha = 0.1,
         #     p.cal[,i] <- colData(x.cal)[[i]]
         # }
         p.cal <- .retrievePredMatrix(x.cal)
+    } else {
+        p.cal <- x.cal
     }
-    else p.cal <- x.cal
 
-    if(!resample.cal){
-        if (follow.ontology){
-            pred.sets <- .getHierarchicalPredSets(p.cal=p.cal, p.test=p.query,
-                                                  y.cal=y.cal, onto=onto,
-                                                  alpha=alpha,
-                                                  lambdas=lambdas)
+    if (!resample.cal) {
+        if (follow.ontology) {
+            pred.sets <- .getHierarchicalPredSets(
+                p.cal = p.cal, p.test = p.query,
+                y.cal = y.cal, onto = onto,
+                alpha = alpha,
+                lambdas = lambdas
+            )
+        } else {
+            pred.sets <- .getConformalPredSets(
+                p.cal = p.cal, p.test = p.query,
+                y.cal = y.cal, alpha = alpha
+            )
         }
-        else
-            pred.sets <- .getConformalPredSets(p.cal=p.cal, p.test=p.query,
-                                               y.cal=y.cal, alpha=alpha)
     }
 
-    if(resample.cal){
-        data <- resample.two(p.cal=p.cal, p.test=p.query, y.cal=y.cal,
-                             labels=labels)
-        if (follow.ontology){
-            pred.sets1 <- .getHierarchicalPredSets(p.cal=data$p.cal2,
-                                                   p.test=data$p.test1,
-                                                   y.cal=data$y.cal2,
-                                                   onto=onto,
-                                                   alpha=alpha,
-                                                   lambdas=lambdas)
-            pred.sets2 <- .getHierarchicalPredSets(p.cal=data$p.cal1,
-                                                   p.test=data$p.test2,
-                                                   y.cal=data$y.cal1,
-                                                   onto=onto,
-                                                   alpha=alpha,
-                                                   lambdas=lambdas)
+    if (resample.cal) {
+        data <- resample.two(
+            p.cal = p.cal, p.test = p.query, y.cal = y.cal,
+            labels = labels
+        )
+        if (follow.ontology) {
+            pred.sets1 <- .getHierarchicalPredSets(
+                p.cal = data$p.cal2,
+                p.test = data$p.test1,
+                y.cal = data$y.cal2,
+                onto = onto,
+                alpha = alpha,
+                lambdas = lambdas
+            )
+            pred.sets2 <- .getHierarchicalPredSets(
+                p.cal = data$p.cal1,
+                p.test = data$p.test2,
+                y.cal = data$y.cal1,
+                onto = onto,
+                alpha = alpha,
+                lambdas = lambdas
+            )
             pred.sets <- c(pred.sets1, pred.sets2)
-        }
-        else {
-            pred.sets1 <- .getConformalPredSets(p.cal=data$p.cal2,
-                                                p.test=data$p.test1,
-                                                y.cal=data$y.cal2,
-                                                alpha=alpha)
-            pred.sets2 <- .getConformalPredSets(p.cal=data$p.cal1,
-                                                p.test=data$p.test2,
-                                                y.cal=data$y.cal1,
-                                                alpha=alpha)
+        } else {
+            pred.sets1 <- .getConformalPredSets(
+                p.cal = data$p.cal2,
+                p.test = data$p.test1,
+                y.cal = data$y.cal2,
+                alpha = alpha
+            )
+            pred.sets2 <- .getConformalPredSets(
+                p.cal = data$p.cal1,
+                p.test = data$p.test2,
+                y.cal = data$y.cal1,
+                alpha = alpha
+            )
             pred.sets <- c(pred.sets1, pred.sets2)
         }
         # Order the prediction set
@@ -261,35 +277,30 @@ getPredictionSets <- function(x.query, x.cal, y.cal, onto=NULL, alpha = 0.1,
 
     # if not specified, return a sc object if the input was a sc object,
     # a matrix if the input was a matrix
-    if(is.null(return.sc) & sc)
+    if (is.null(return.sc) & sc) {
         return.sc <- TRUE
-    if(is.null(return.sc) & !sc)
+    }
+    if (is.null(return.sc) & !sc) {
         return.sc <- FALSE
-    if(return.sc){
+    }
+    if (return.sc) {
         colData(x.query)[[pr.name]] <- pred.sets
         return(x.query)
     }
-    if(!return.sc){
+    if (!return.sc) {
         return(pred.sets)
     }
-
 }
 
 ## function to retrieve prediction matrix from the colData of a
 ## SingleCellExperiment object
 
-.retrievePredMatrix <- function(sc){
+.retrievePredMatrix <- function(sc) {
     n.sc <- ncol(sc)
-    p.sc <- matrix(NA, nrow=n.sc, ncol=K)
+    p.sc <- matrix(NA, nrow = n.sc, ncol = K)
     colnames(p.sc) <- labels
-    for(i in labels){
-        p.sc[,i] <- colData(sc)[[i]]
+    for (i in labels) {
+        p.sc[, i] <- colData(sc)[[i]]
     }
     return(p.sc)
 }
-
-
-
-
-
-
diff --git a/R/old/ancestors.R b/R/old/ancestors.R
index 76af3a9..22312ab 100644
--- a/R/old/ancestors.R
+++ b/R/old/ancestors.R
@@ -8,9 +8,10 @@
 #' @importFrom igraph degree
 #' @importFrom igraph distances
 
-.ancestors <- function(node, onto, include_self=TRUE){
-    if(include_self)
-      return(V(onto)$name[is.finite(distances(onto, node, mode="in"))])
-    else
-      return(V(onto)$name[is.finite(distances(onto, node, mode="in")) & V(onto)$name!=node])
+.ancestors <- function(node, onto, include_self = TRUE) {
+    if (include_self) {
+        return(V(onto)$name[is.finite(distances(onto, node, mode = "in"))])
+    } else {
+        return(V(onto)$name[is.finite(distances(onto, node, mode = "in")) & V(onto)$name != node])
+    }
 }
diff --git a/R/old/children.R b/R/old/children.R
index e425946..5e69eee 100644
--- a/R/old/children.R
+++ b/R/old/children.R
@@ -10,9 +10,10 @@
 #' @importFrom igraph distances
 
 
-.children <- function(node, onto, leaf=TRUE){
-    if(leaf)
-      return(V(onto)$name[is.finite(distances(onto, node, mode="out")) & degree(onto, mode="out")==0])
-    else
-      return(V(onto)$name[is.finite(distances(onto, node, mode="out"))])
+.children <- function(node, onto, leaf = TRUE) {
+    if (leaf) {
+        return(V(onto)$name[is.finite(distances(onto, node, mode = "out")) & degree(onto, mode = "out") == 0])
+    } else {
+        return(V(onto)$name[is.finite(distances(onto, node, mode = "out"))])
+    }
 }
diff --git a/R/old/conformal.R b/R/old/conformal.R
index 58b553b..faf479a 100644
--- a/R/old/conformal.R
+++ b/R/old/conformal.R
@@ -1,80 +1,80 @@
 #### Function to get the conformal quantile
-getConfQuant <- function(p, label, alpha){
-  #p prediction matrix for data in the calibration set (ncal x num labels)
-  #label true label for data in the calibration set
-  #alpha desired error rate
-  true <- rep(NA, dim(p)[1])
-  for (i in 1:dim(p)[1])
-    true[i] <- p[i, label[i]]
-  
-  cal.scores <- 1-true
-  
-  # get adjusted quantile
-  n <- dim(p)[1]
-  q_level = ceiling((n+1)*(1-alpha))/n
-  qhat = quantile(cal.scores, q_level)
-  return(list(scores=cal.scores, qhat=qhat))
+getConfQuant <- function(p, label, alpha) {
+    # p prediction matrix for data in the calibration set (ncal x num labels)
+    # label true label for data in the calibration set
+    # alpha desired error rate
+    true <- rep(NA, dim(p)[1])
+    for (i in 1:dim(p)[1]) {
+        true[i] <- p[i, label[i]]
+    }
+
+    cal.scores <- 1 - true
+
+    # get adjusted quantile
+    n <- dim(p)[1]
+    q_level <- ceiling((n + 1) * (1 - alpha)) / n
+    qhat <- quantile(cal.scores, q_level)
+    return(list(scores = cal.scores, qhat = qhat))
 }
 
 #### Function to get the prediction sets for test data
-getPredSets <- function(pred, label, qhat, getClass=T, acc.p=NULL, summary=F){
-  #pred prediction matrix for data in the test set (ntest x num labels)
-  #label true label for data in the test set (needed to evaluate accuracy and coverage)
-  #qhat conformal quantile
-  
-  #get predicted class from prediction matrix
-  if (getClass)
-    acc.p <- apply(pred, 1, function(row) colnames(pred)[which.max(row)])
-  acc <- mean(acc.p==label)
-  prediction_sets <- pred >=(1-qhat)
-  rs <- rowSums(prediction_sets) # to have summary on size of prediction sets
-  
-  # Get prediction set colnames
-  pr.list <- lapply(1:nrow(prediction_sets), function(i) {
-    colnames(prediction_sets)[prediction_sets[i, ]]
-  })
-  
-  # Get coverage
-  tf <- rep(NA, length(pr.list))
-  for (i in 1:length(pr.list))
-    tf[i] <- label[i] %in% pr.list[[i]]
-  coverage <- sum(tf)/length(pr.list) 
-  
-  if (summary)
-    return(list(accuracy=acc, pred.sets=pr.list, coverage=coverage, summary=summary(rs)))
-  else
-    return(list(accuracy=acc, pred.sets=pr.list, coverage=coverage))
-}
+getPredSets <- function(pred, label, qhat, getClass = T, acc.p = NULL, summary = F) {
+    # pred prediction matrix for data in the test set (ntest x num labels)
+    # label true label for data in the test set (needed to evaluate accuracy and coverage)
+    # qhat conformal quantile
 
-#### Function to get class conditional coverage
-class_sp_conf <- function(classes, pred.cal, pred.test, labels.cal, labels.test){
-  s <- rep(NA, length(classes))
-  names(s) <- classes
-  
-  for (i in 1:length(classes)){
-    p <- as.matrix(pred.cal[labels.cal==classes[i],])
-    s[i] <- getConfQuant(p, rep(classes[i], nrow(p)), 0.05)$qhat
-  }
-  
-  acc.p <- apply(pred.test, 1, function(row) colnames(pred.test)[which.max(row)])
-  p.sets <- matrix(NA, nrow=nrow(pred.test), ncol=ncol(pred.test))
-  colnames(p.sets) <- colnames(pred.test)
-  for(i in 1:length(classes))
-    p.sets[,classes[i]] <- pred.test[,classes[i]] >= (1-s[classes[i]])
-  
-  pr.list <- lapply(1:nrow(p.sets), function(i) {
-    colnames(p.sets)[p.sets[i, ]]
-  })
-  
-  # Get coverage
-  tf <- rep(NA, length(pr.list))
-  for (i in 1:length(pr.list))
-    tf[i] <- labels.test[i] %in% pr.list[[i]]
-  return(list(pred.sets = pr.list, coverage=mean(tf)))
-}
+    # get predicted class from prediction matrix
+    if (getClass) {
+        acc.p <- apply(pred, 1, function(row) colnames(pred)[which.max(row)])
+    }
+    acc <- mean(acc.p == label)
+    prediction_sets <- pred >= (1 - qhat)
+    rs <- rowSums(prediction_sets) # to have summary on size of prediction sets
 
+    # Get prediction set colnames
+    pr.list <- lapply(1:nrow(prediction_sets), function(i) {
+        colnames(prediction_sets)[prediction_sets[i, ]]
+    })
 
+    # Get coverage
+    tf <- rep(NA, length(pr.list))
+    for (i in 1:length(pr.list)) {
+        tf[i] <- label[i] %in% pr.list[[i]]
+    }
+    coverage <- sum(tf) / length(pr.list)
 
+    if (summary) {
+        return(list(accuracy = acc, pred.sets = pr.list, coverage = coverage, summary = summary(rs)))
+    } else {
+        return(list(accuracy = acc, pred.sets = pr.list, coverage = coverage))
+    }
+}
+
+#### Function to get class conditional coverage
+class_sp_conf <- function(classes, pred.cal, pred.test, labels.cal, labels.test) {
+    s <- rep(NA, length(classes))
+    names(s) <- classes
 
+    for (i in 1:length(classes)) {
+        p <- as.matrix(pred.cal[labels.cal == classes[i], ])
+        s[i] <- getConfQuant(p, rep(classes[i], nrow(p)), 0.05)$qhat
+    }
 
+    acc.p <- apply(pred.test, 1, function(row) colnames(pred.test)[which.max(row)])
+    p.sets <- matrix(NA, nrow = nrow(pred.test), ncol = ncol(pred.test))
+    colnames(p.sets) <- colnames(pred.test)
+    for (i in 1:length(classes)) {
+        p.sets[, classes[i]] <- pred.test[, classes[i]] >= (1 - s[classes[i]])
+    }
 
+    pr.list <- lapply(1:nrow(p.sets), function(i) {
+        colnames(p.sets)[p.sets[i, ]]
+    })
+
+    # Get coverage
+    tf <- rep(NA, length(pr.list))
+    for (i in 1:length(pr.list)) {
+        tf[i] <- labels.test[i] %in% pr.list[[i]]
+    }
+    return(list(pred.sets = pr.list, coverage = mean(tf)))
+}
diff --git a/R/old/getHierarchicalPredSets.R b/R/old/getHierarchicalPredSets.R
index 1d316c3..0ba27e6 100644
--- a/R/old/getHierarchicalPredSets.R
+++ b/R/old/getHierarchicalPredSets.R
@@ -30,87 +30,93 @@
 # Angelopoulus (2023), Conformal Risk Control. Finally, builds prediction sets
 # for p.test based on the selected lambda value.
 
-.getHierarchicalPredSets <- function(p.cal, p.test, y.cal, onto, alpha, lambdas){
+.getHierarchicalPredSets <- function(p.cal, p.test, y.cal, onto, alpha, lambdas) {
     y.cal <- as.character(y.cal)
     # Get prediction sets for each value of lambda for all the calibration data
     j <- NULL
-    exportedFn = c(".predSets", ".scores", ".children", ".ancestors")
-    sets <- foreach(j = lambdas, .export=exportedFn) %dopar% {
-              lapply(1:nrow(p.cal),
-                function(i) .predSets(lambda=j, pred=p.cal[i, ], onto=onto))
-      }
+    exportedFn <- c(".predSets", ".scores", ".children", ".ancestors")
+    sets <- foreach(j = lambdas, .export = exportedFn) %dopar% {
+        lapply(
+            1:nrow(p.cal),
+            function(i) .predSets(lambda = j, pred = p.cal[i, ], onto = onto)
+        )
+    }
 
     # Get the loss table (ncal x length(lambda) table with TRUE\FALSE)
     loss <- sapply(1:length(lambdas), function(lambda) {
-      sapply(seq_along(y.cal), function(i) {
-        !(y.cal[i] %in% sets[[lambda]][[i]])
-      })
+        sapply(seq_along(y.cal), function(i) {
+            !(y.cal[i] %in% sets[[lambda]][[i]])
+        })
     })
 
     # Get lhat
     n <- nrow(loss)
     rhat <- colMeans(loss)
-    lhat_idx <- min(which(((n/(n+1)) * rhat + 1/(n+1) ) <= alpha))
+    lhat_idx <- min(which(((n / (n + 1)) * rhat + 1 / (n + 1)) <= alpha))
     lhat <- lambdas[lhat_idx]
 
 
     # Get prediction sets for test data
-    sets.test <- apply(p.test, 1, function(x) .predSets(lambda=lhat, pred=x, onto=onto))
+    sets.test <- apply(p.test, 1, function(x) .predSets(lambda = lhat, pred = x, onto = onto))
 
-    return(list(sets.test=sets.test, lhat=lhat))
+    return(list(sets.test = sets.test, lhat = lhat))
 }
 
 
-.ancestors <- function(node, onto, include_self=TRUE){
-  if(include_self)
-    return(V(onto)$name[is.finite(distances(onto, node, mode="in"))])
-  else
-    return(V(onto)$name[is.finite(distances(onto, node, mode="in")) & V(onto)$name!=node])
+.ancestors <- function(node, onto, include_self = TRUE) {
+    if (include_self) {
+        return(V(onto)$name[is.finite(distances(onto, node, mode = "in"))])
+    } else {
+        return(V(onto)$name[is.finite(distances(onto, node, mode = "in")) & V(onto)$name != node])
+    }
 }
 
-.children <- function(node, onto, leaf=TRUE){
-  if(leaf)
-    return(V(onto)$name[is.finite(distances(onto, node, mode="out")) & degree(onto, mode="out")==0])
-  else
-    return(V(onto)$name[is.finite(distances(onto, node, mode="out"))])
+.children <- function(node, onto, leaf = TRUE) {
+    if (leaf) {
+        return(V(onto)$name[is.finite(distances(onto, node, mode = "out")) & degree(onto, mode = "out") == 0])
+    } else {
+        return(V(onto)$name[is.finite(distances(onto, node, mode = "out"))])
+    }
 }
 
-.scores <- function(pred, int_node, onto){
-  c <- .children(node = int_node, onto = onto, leaf = TRUE)
-  return(sum(pred[c]))
+.scores <- function(pred, int_node, onto) {
+    c <- .children(node = int_node, onto = onto, leaf = TRUE)
+    return(sum(pred[c]))
 }
 
-.predSets <- function(lambda, pred, onto){
-  # Get the predicted class and its ancestors
-  pred_class <- names(pred)[which.max(pred)]
-  anc <- .ancestors(node = pred_class, onto = onto, include_self = TRUE)
-
-  # Compute scores for all the ancestor of the predicted class
-  s <- sapply(as.character(anc), function(i) .scores(pred=pred, int_node=i,
-                                                     onto=onto))
-  names(s) <- anc
+.predSets <- function(lambda, pred, onto) {
+    # Get the predicted class and its ancestors
+    pred_class <- names(pred)[which.max(pred)]
+    anc <- .ancestors(node = pred_class, onto = onto, include_self = TRUE)
+
+    # Compute scores for all the ancestor of the predicted class
+    s <- sapply(as.character(anc), function(i) {
+        .scores(
+            pred = pred, int_node = i,
+            onto = onto
+        )
+    })
+    names(s) <- anc
 
-  #Sort them by score and if there are ties by distance to the predicted class
-  ## compute distance from predicted class
-  pos <- distances(onto, v=anc, to=pred_class, mode="out")
-  tie_breaker <- as.vector(t(pos))
-  names(tie_breaker) <- colnames(t(pos))
-  sorted_indices <- order(s, tie_breaker, decreasing=FALSE)
-  sorted_scores <- s[sorted_indices]
+    # Sort them by score and if there are ties by distance to the predicted class
+    ## compute distance from predicted class
+    pos <- distances(onto, v = anc, to = pred_class, mode = "out")
+    tie_breaker <- as.vector(t(pos))
+    names(tie_breaker) <- colnames(t(pos))
+    sorted_indices <- order(s, tie_breaker, decreasing = FALSE)
+    sorted_scores <- s[sorted_indices]
 
-  # Select the first score that is geq than lambda
-  sel_node <- names(sorted_scores)[round(sorted_scores, 15) >= lambda][1]
+    # Select the first score that is geq than lambda
+    sel_node <- names(sorted_scores)[round(sorted_scores, 15) >= lambda][1]
 
 
-  # Add also the subgraphs we would have obtained with smaller lambda
-  selected <- c(lapply(anc[round(s, 15) < lambda], function(x)
-    .children(node = x, onto = onto)),
-    list(.children(sel_node, onto)))
+    # Add also the subgraphs we would have obtained with smaller lambda
+    selected <- c(
+        lapply(anc[round(s, 15) < lambda], function(x) {
+            .children(node = x, onto = onto)
+        }),
+        list(.children(sel_node, onto))
+    )
 
-  return(Reduce(union, selected))
+    return(Reduce(union, selected))
 }
-
-
-
-
-
diff --git a/R/old/predSets.R b/R/old/predSets.R
index 3e577c4..b041d50 100644
--- a/R/old/predSets.R
+++ b/R/old/predSets.R
@@ -6,22 +6,26 @@
 #' @author Daniela Corbetta
 #' @return vector with names of the selected leaf nodes
 
-.predSets <- function(lambda, pred, onto){
+.predSets <- function(lambda, pred, onto) {
     # Get the predicted class and its ancestors
     pred_class <- names(pred)[which.max(pred)]
     anc <- .ancestors(node = pred_class, onto = onto, include_self = TRUE)
 
     # Compute scores for all the ancestor of the predicted class
-    s <- sapply(as.character(anc), function(i) .scores(pred=pred, int_node=i,
-                                                       onto=onto))
+    s <- sapply(as.character(anc), function(i) {
+        .scores(
+            pred = pred, int_node = i,
+            onto = onto
+        )
+    })
     names(s) <- anc
 
-    #Sort them by score and if there are ties by distance to the predicted class
+    # Sort them by score and if there are ties by distance to the predicted class
     ## compute distance from predicted class
-    pos <- distances(onto, v=anc, to=pred_class, mode="out")
+    pos <- distances(onto, v = anc, to = pred_class, mode = "out")
     tie_breaker <- as.vector(t(pos))
     names(tie_breaker) <- colnames(t(pos))
-    sorted_indices <- order(s, tie_breaker, decreasing=FALSE)
+    sorted_indices <- order(s, tie_breaker, decreasing = FALSE)
     sorted_scores <- s[sorted_indices]
 
     # Select the first score that is geq than lambda
@@ -29,9 +33,12 @@
 
 
     # Add also the subgraphs we would have obtained with smaller lambda
-    selected <- c(lapply(anc[round(s, 15) < lambda], function(x)
-                      .children(node = x, onto = onto)),
-                  list(.children(sel_node, onto)))
+    selected <- c(
+        lapply(anc[round(s, 15) < lambda], function(x) {
+            .children(node = x, onto = onto)
+        }),
+        list(.children(sel_node, onto))
+    )
 
     return(Reduce(union, selected))
 }
diff --git a/R/old/scores.R b/R/old/scores.R
index 4f69d7d..7e8cc0b 100644
--- a/R/old/scores.R
+++ b/R/old/scores.R
@@ -8,7 +8,7 @@
 #' @author Daniela Corbetta
 #' @return returns the score
 
-.scores <- function(pred, int_node, onto){
+.scores <- function(pred, int_node, onto) {
     c <- .children(node = int_node, onto = onto, leaf = TRUE)
     return(sum(pred[c]))
 }
diff --git a/R/old/somegraphs.R b/R/old/somegraphs.R
index e9079a5..9219417 100644
--- a/R/old/somegraphs.R
+++ b/R/old/somegraphs.R
@@ -6,11 +6,13 @@
 # Build example graph
 #####################
 library(igraph)
-t <- graph_from_literal(animale-+cane:gatto:topo, gatto-+british:persiano,
-                         gatto-+retriever,
-                         cane-+cocker:retriever, retriever-+golden:labrador)
-plot(t, layout=layout_as_tree(t, root="animale"))
-p <- c(0.2, 0.3, 0.2, 0.10, 0.15,0.05)
+t <- graph_from_literal(
+    animale - +cane:gatto:topo, gatto - +british:persiano,
+    gatto - +retriever,
+    cane - +cocker:retriever, retriever - +golden:labrador
+)
+plot(t, layout = layout_as_tree(t, root = "animale"))
+p <- c(0.2, 0.3, 0.2, 0.10, 0.15, 0.05)
 names(p) <- c("cocker", "golden", "labrador", "british", "persiano", "topo")
 p
 
@@ -25,22 +27,22 @@ source("/Users/daniela/Documents/GitHub/ConfCell/R/old/utils_Hier.R")
 nam <- V(t)$name
 s <- sapply(nam, function(x) g(p, x, t))
 s1 <- sapply(nam, function(x) .scores(p, x, t))
-sum(s!=s1) # ok
+sum(s != s1) # ok
 
 # Check ancestors
 anc <- sapply(nam, function(x) ancestors(x, graph = t, include_self = T))
 anc1 <- sapply(nam, function(x) .ancestors(x, onto = t, include_self = T))
 tf <- rep(NA, length(anc))
-for(i in 1:length(anc)){
-  tf[i] <- mean(anc[[i]]==anc1[[i]])
+for (i in 1:length(anc)) {
+    tf[i] <- mean(anc[[i]] == anc1[[i]])
 }
 tf
 
 anc <- sapply(nam, function(x) ancestors(x, graph = t, include_self = F))
 anc1 <- sapply(nam, function(x) .ancestors(x, onto = t, include_self = F))
 tf <- rep(NA, length(anc))
-for(i in 1:length(anc)){
-  tf[i] <- mean(anc[[i]]==anc1[[i]])
+for (i in 1:length(anc)) {
+    tf[i] <- mean(anc[[i]] == anc1[[i]])
 }
 tf
 
@@ -48,42 +50,42 @@ tf
 child <- sapply(nam, function(x) children(x, graph = t, leaf = T))
 child1 <- sapply(nam, function(x) .children(x, onto = t, leaf = T))
 tf <- rep(NA, length(child))
-for(i in 1:length(child)){
-  tf[i] <- mean(child[[i]]==child1[[i]])
+for (i in 1:length(child)) {
+    tf[i] <- mean(child[[i]] == child1[[i]])
 }
 tf
 
 child <- sapply(nam, function(x) children(x, graph = t, leaf = F))
 child1 <- sapply(nam, function(x) .children(x, onto = t, leaf = F))
 tf <- rep(NA, length(child))
-for(i in 1:length(child)){
-  tf[i] <- mean(child[[i]]==child1[[i]])
+for (i in 1:length(child)) {
+    tf[i] <- mean(child[[i]] == child1[[i]])
 }
 tf
 
 # .predSets
 pred_sets1(0.5, p, t)
-.predSets(lambda=0.5, pred=p, onto=t)
+.predSets(lambda = 0.5, pred = p, onto = t)
 
 pred_sets1(0.7, p, t)
-.predSets(lambda=0.7, pred=p, onto=t)
+.predSets(lambda = 0.7, pred = p, onto = t)
 
 pred_sets1(0.75, p, t)
-.predSets(lambda=0.75, pred=p, onto=t)
+.predSets(lambda = 0.75, pred = p, onto = t)
 
-pred_sets1(0.8, p, t) #should be all
-.predSets(lambda=0.8, pred=p, onto=t)
+pred_sets1(0.8, p, t) # should be all
+.predSets(lambda = 0.8, pred = p, onto = t)
 
 pred_sets1(1, p, t)
-.predSets(lambda=1, pred=p, onto=t)
+.predSets(lambda = 1, pred = p, onto = t)
 
 pred_sets1(0.6, p, t) # cane (golden, labrador, cocker)
-.predSets(lambda=0.6, pred=p, onto=t)
+.predSets(lambda = 0.6, pred = p, onto = t)
 
 p1 <- c(0.3, 0.4, 0.3, 0, 0, 0)
 names(p1) <- c("cocker", "golden", "labrador", "british", "persiano", "topo")
 pred_sets1(0.7, p1, t)
-.predSets(lambda=0.7, pred=p1, onto=t)
+.predSets(lambda = 0.7, pred = p1, onto = t)
 
 #########################################
 ### Create a random dataset
@@ -91,31 +93,33 @@ pred_sets1(0.7, p1, t)
 set.seed(1010)
 n <- 2100
 
-leaves <- c(4,6,7,8,9)
+leaves <- c(4, 6, 7, 8, 9)
 nclasses <- length(leaves)
-lab <- sample(leaves, n, replace=T)
-x <- (runif(n,0,3.1) + lab)
+lab <- sample(leaves, n, replace = T)
+x <- (runif(n, 0, 3.1) + lab)
 y <- rep(NA, length(lab))
-y[lab==4] <- "cocker"
-y[lab==6] <- "golden"
-y[lab==7] <- "labrador"
-y[lab==8] <- "british"
-y[lab==9] <- "persiano"
-xydf <- data.frame(y=as.factor(y), x=x)
+y[lab == 4] <- "cocker"
+y[lab == 6] <- "golden"
+y[lab == 7] <- "labrador"
+y[lab == 8] <- "british"
+y[lab == 9] <- "persiano"
+xydf <- data.frame(y = as.factor(y), x = x)
 head(xydf)
 
 # Divide in train, cal and test and fit model
 library(VGAM)
-train <- xydf[1:1000,]
-cal <- xydf[1001:1100,]
-test <- xydf[1101:2100,]
+train <- xydf[1:1000, ]
+cal <- xydf[1001:1100, ]
+test <- xydf[1101:2100, ]
 
 
-fit <- vglm(y~x, family = multinomial(refLevel = "cocker"),
-            data = train)
-lambdas <- seq(0.001,0.999,length.out=100)
+fit <- vglm(y ~ x,
+    family = multinomial(refLevel = "cocker"),
+    data = train
+)
+lambdas <- seq(0.001, 0.999, length.out = 100)
 # Compute predicted probabilities for calibration points
-p.cal <- predict(fit, newdata=cal, type="response")
+p.cal <- predict(fit, newdata = cal, type = "response")
 library(doParallel)
 library(foreach)
 #
@@ -123,16 +127,17 @@ num_cores <- 4
 cl <- makeCluster(num_cores)
 registerDoParallel(cl)
 
-t3 <- graph_from_literal(animale-+cane:gatto, gatto-+british:persiano,
-                         gatto-+retriever,
-                         cane-+cocker:retriever, retriever-+golden:labrador)
-plot(t3, layout=layout_as_tree(t3, root="animale"))
-exportedFn = c(".predSets", ".scores", ".children", ".ancestors")
-system.time(sets1 <- foreach(lambda = lambdas, .packages = c("ConfCell", "igraph")) %dopar% {
-  library(igraph)
-  lapply(1:nrow(p.cal), function(i) pred_sets1(lambda, p.cal[i, ], t3))
-}
+t3 <- graph_from_literal(
+    animale - +cane:gatto, gatto - +british:persiano,
+    gatto - +retriever,
+    cane - +cocker:retriever, retriever - +golden:labrador
 )
+plot(t3, layout = layout_as_tree(t3, root = "animale"))
+exportedFn <- c(".predSets", ".scores", ".children", ".ancestors")
+system.time(sets1 <- foreach(lambda = lambdas, .packages = c("ConfCell", "igraph")) %dopar% {
+    library(igraph)
+    lapply(1:nrow(p.cal), function(i) pred_sets1(lambda, p.cal[i, ], t3))
+})
 
 #
 system.time(l1 <- get_loss_table_mis(lambdas, sets1, as.character(cal$y), t3))
@@ -140,9 +145,9 @@ Rhat <- colMeans(l1)
 plot(Rhat)
 all(diff(Rhat) <= 0)
 
-lhat <- get_lhat(l1,lambdas,0.1)
-p.test <- predict(fit, newdata=test, type="response")
-sets.test <- apply(p.test, 1, function(x) pred_sets1(lambda=lhat, x, t3))
+lhat <- get_lhat(l1, lambdas, 0.1)
+p.test <- predict(fit, newdata = test, type = "response")
+sets.test <- apply(p.test, 1, function(x) pred_sets1(lambda = lhat, x, t3))
 
 t <- .getHierarchicalPredSets(p.cal, p.test, cal$y, onto = t3, alpha = 0.1, lambdas = lambdas)
 t1 <- .getConformalPredSets(p.cal, p.test, as.character(cal$y), 0.1)
@@ -154,11 +159,13 @@ l.crc <- sapply(t$sets.test, length)
 barplot(table(l.std))
 barplot(table(l.crc))
 cvg <- cvg1 <- rep(NA, length(t))
-for(i in 1:length(t1))
-  cvg[i] <- test$y[i] %in% t1[[i]]
+for (i in 1:length(t1)) {
+    cvg[i] <- test$y[i] %in% t1[[i]]
+}
 
-for(i in 1:length(t$sets.test))
-  cvg1[i] <- test$y[i] %in% t$sets.test[[i]]
+for (i in 1:length(t$sets.test)) {
+    cvg1[i] <- test$y[i] %in% t$sets.test[[i]]
+}
 mean(cvg)
 mean(cvg1)
 
@@ -166,9 +173,10 @@ mean(cvg1)
 
 
 
-sets <- foreach(j = lambdas, .export=exportedFn) %dopar% {
-  library(igraph)
-  lapply(1:nrow(p.cal),
-         function(i) .predSets(lambda=j, pred=p.cal[i, ], onto=t3))
+sets <- foreach(j = lambdas, .export = exportedFn) %dopar% {
+    library(igraph)
+    lapply(
+        1:nrow(p.cal),
+        function(i) .predSets(lambda = j, pred = p.cal[i, ], onto = t3)
+    )
 }
-
diff --git a/R/old/utils_Hier.R b/R/old/utils_Hier.R
index c02a84b..28be1e2 100644
--- a/R/old/utils_Hier.R
+++ b/R/old/utils_Hier.R
@@ -4,32 +4,34 @@
 
 
 # Conformal risk control
-# Function to retrieve children nodes of a given interior node. 
+# Function to retrieve children nodes of a given interior node.
 # If leaf=T gives leaf nodes, else gives all descendants
-children <- function(node, graph, leaf=T){
-  require(igraph)
-  if(leaf)
-    return(V(graph)$name[is.finite(distances(graph, node, mode="out")) & igraph::degree(graph, mode="out")==0])
-  else
-    return(V(graph)$name[is.finite(distances(graph, node, mode="out"))])
+children <- function(node, graph, leaf = T) {
+    require(igraph)
+    if (leaf) {
+        return(V(graph)$name[is.finite(distances(graph, node, mode = "out")) & igraph::degree(graph, mode = "out") == 0])
+    } else {
+        return(V(graph)$name[is.finite(distances(graph, node, mode = "out"))])
+    }
 }
 
 # Function to retrieve ancestors of a given node
-ancestors <- function(node, graph, include_self=T){
-  require(igraph)
-  if(include_self)
-    return(V(graph)$name[is.finite(distances(graph, node, mode="in"))])
-  else
-    return(V(graph)$name[is.finite(distances(graph, node, mode="in")) & V(graph)$name!=node])
+ancestors <- function(node, graph, include_self = T) {
+    require(igraph)
+    if (include_self) {
+        return(V(graph)$name[is.finite(distances(graph, node, mode = "in"))])
+    } else {
+        return(V(graph)$name[is.finite(distances(graph, node, mode = "in")) & V(graph)$name != node])
+    }
 }
 
-# Define function to compute g(a,x), where a is a node 
+# Define function to compute g(a,x), where a is a node
 # (g(a,x)=sum_{y in P(a)}(pi(y)))
 # @param pred named vector with predicted probabilities for one observation to be in one of the leaves class
 
-g <- function(pred, int_node, graph){
-  c <- children(node = int_node, graph = graph, leaf = T)
-  return(sum(pred[c]))
+g <- function(pred, int_node, graph) {
+    c <- children(node = int_node, graph = graph, leaf = T)
+    return(sum(pred[c]))
 }
 
 # Function to construct confidence sets (for one obs for now)
@@ -37,84 +39,86 @@ g <- function(pred, int_node, graph){
 # @param p predicted probabilities for leaf nodes
 # @param graph graph
 
-pred_sets1 <- function(lambda, p, graph){
-  pred_class <- names(p)[which.max(p)]
-  anc <- ancestors(node = pred_class, graph = graph, include_self = TRUE)
-  
-  # Obtain scores for ancestors of the predicted class
-  scores <- sapply(as.character(anc), function(i) g(p, i, graph))
-  names(scores) <- anc
-  
-  # Sort them by score and if there are ties by distance to the predicted class
-  ## compute distance from predicted class
-  pos <- distances(graph, v=anc, to=pred_class, mode="out")
-  tie_breaker <- as.vector(t(pos))
-  names(tie_breaker) <- colnames(t(pos))
-  sorted_indices <- order(scores, tie_breaker, decreasing=F)
-  sorted_scores <- scores[sorted_indices]
-  
-  # Select the first score that is geq than lambda
-  sel_node <- names(sorted_scores)[round(sorted_scores, 15) >= lambda][1]
-  
-  # c(lapply(anc[round(scores, 15) <= lambda]
-  selected <- c(lapply(anc[round(scores, 15) < lambda], function(x) children(node = x, graph = graph)),
-                list(children(sel_node, graph)))
-  
-  return(Reduce(union, selected))
+pred_sets1 <- function(lambda, p, graph) {
+    pred_class <- names(p)[which.max(p)]
+    anc <- ancestors(node = pred_class, graph = graph, include_self = TRUE)
+
+    # Obtain scores for ancestors of the predicted class
+    scores <- sapply(as.character(anc), function(i) g(p, i, graph))
+    names(scores) <- anc
+
+    # Sort them by score and if there are ties by distance to the predicted class
+    ## compute distance from predicted class
+    pos <- distances(graph, v = anc, to = pred_class, mode = "out")
+    tie_breaker <- as.vector(t(pos))
+    names(tie_breaker) <- colnames(t(pos))
+    sorted_indices <- order(scores, tie_breaker, decreasing = F)
+    sorted_scores <- scores[sorted_indices]
+
+    # Select the first score that is geq than lambda
+    sel_node <- names(sorted_scores)[round(sorted_scores, 15) >= lambda][1]
+
+    # c(lapply(anc[round(scores, 15) <= lambda]
+    selected <- c(
+        lapply(anc[round(scores, 15) < lambda], function(x) children(node = x, graph = graph)),
+        list(children(sel_node, graph))
+    )
+
+    return(Reduce(union, selected))
 }
 
 # Do not consider this
-pred_sets2 <- function(lambda, p, graph){
-  pred_class <- names(p)[which.max(p)]
-  anc <- ancestors(node = pred_class, graph = graph, include_self = TRUE)
-  
-  scores <- sapply(anc, function(i) g(p, i, graph))
-  names(scores) <- anc
-  
-  selected <- lapply(anc[round(scores, 15) <= lambda], function(x) children(node = x, graph = graph))
-  if(length(selected)==0) selected <- pred_class
-  
-  return(Reduce(union, selected))
+pred_sets2 <- function(lambda, p, graph) {
+    pred_class <- names(p)[which.max(p)]
+    anc <- ancestors(node = pred_class, graph = graph, include_self = TRUE)
+
+    scores <- sapply(anc, function(i) g(p, i, graph))
+    names(scores) <- anc
+
+    selected <- lapply(anc[round(scores, 15) <= lambda], function(x) children(node = x, graph = graph))
+    if (length(selected) == 0) selected <- pred_class
+
+    return(Reduce(union, selected))
 }
 
 # Loss table with miscoverage
 get_loss_table_mis <- function(lambdas, prediction, ycal, graph) {
-  loss <- sapply(1:length(lambdas), function(lambda) {
-    sapply(seq_along(ycal), function(i) {
-      !(ycal[i] %in% prediction[[lambda]][[i]])
+    loss <- sapply(1:length(lambdas), function(lambda) {
+        sapply(seq_along(ycal), function(i) {
+            !(ycal[i] %in% prediction[[lambda]][[i]])
+        })
     })
-  })
-  
-  return(loss) 
+
+    return(loss)
 }
 
 # Hierarchical loss proposed by Angelopoulus and Bates
-hier_loss <- function(set, true_class, graph){
-  anc <- ancestors(true_class, graph)
-  set_distances <- distances(graph, v = anc, to = set)
-  
-  vroot <- V(graph)[degree(graph, mode = "in") == 0]
-  depth <- max(distances(graph, vroot, mode = "out"))
-  
-  return(min(set_distances) / depth)
+hier_loss <- function(set, true_class, graph) {
+    anc <- ancestors(true_class, graph)
+    set_distances <- distances(graph, v = anc, to = set)
+
+    vroot <- V(graph)[degree(graph, mode = "in") == 0]
+    depth <- max(distances(graph, vroot, mode = "out"))
+
+    return(min(set_distances) / depth)
 }
 
 # Loss table with hierarchical loss
 get_loss_table <- function(lambdas, prediction, ycal, graph) {
-  loss <- sapply(1:length(lambdas), function(lambda) {
-    sapply(seq_along(ycal), function(i) {
-      hier_loss(prediction[[lambda]][[i]], true_class = ycal[i], graph = graph)
+    loss <- sapply(1:length(lambdas), function(lambda) {
+        sapply(seq_along(ycal), function(i) {
+            hier_loss(prediction[[lambda]][[i]], true_class = ycal[i], graph = graph)
+        })
     })
-  })
-  
-  return(loss)  
+
+    return(loss)
 }
 
 # Find lambda hat on a grid of lambda values
-get_lhat <- function(calib_loss_table, lambdas, alpha, B=1) {
-  n <- nrow(calib_loss_table)
-  rhat <- colMeans(calib_loss_table)
-  lhat_idx <- min(which(((n/(n+1)) * rhat + B/(n+1) ) <= alpha))
-  # Return the corresponding lambda value at lhat_idx
-  return(lambdas[lhat_idx])
+get_lhat <- function(calib_loss_table, lambdas, alpha, B = 1) {
+    n <- nrow(calib_loss_table)
+    rhat <- colMeans(calib_loss_table)
+    lhat_idx <- min(which(((n / (n + 1)) * rhat + B / (n + 1)) <= alpha))
+    # Return the corresponding lambda value at lhat_idx
+    return(lambdas[lhat_idx])
 }
diff --git a/R/old/utils_Resample.R b/R/old/utils_Resample.R
index d975aa7..0736a4a 100644
--- a/R/old/utils_Resample.R
+++ b/R/old/utils_Resample.R
@@ -1,36 +1,38 @@
 ########## Utils for resampling strategy
 
-resample.two <- function(cal, test, cal.pred, test.pred, seed=NA){
-  if(!is.na(seed)) set.seed(seed)
-  s <- sample(1:nrow(test), round(nrow(test)/2))
-  test1 <- test[s,]
-  test2 <- test[-s,]
-  p.test1 <- test.pred[s,]
-  p.test2 <- test.pred[-s,]
-  cal_freq <- prop.table(table(cal$Y))
-  pr.class1 <- apply(p.test1, 1, function(row) colnames(p.test1)[which.max(row)])
-  pr.class2 <- apply(p.test2, 1, function(row) colnames(p.test2)[which.max(row)])
-  test_freq1 <- prop.table(table(pr.class1))
-  test_freq2 <- prop.table(table(pr.class2))
-  des_freq1 <- round(test_freq1 * length(cal$Y))
-  des_freq2 <- round(test_freq2 * length(cal$Y))
-  
-  idx1 <- idx2 <- NULL
-  for (i in cl.types) {
-    cat <- which(cal$Y == i)
-    if(!is.na(des_freq1[i])){
-      idx_cat1 <- sample(cat, size = des_freq1[i], replace = TRUE)
-      idx1 <- c(idx1, idx_cat1)
+resample.two <- function(cal, test, cal.pred, test.pred, seed = NA) {
+    if (!is.na(seed)) set.seed(seed)
+    s <- sample(1:nrow(test), round(nrow(test) / 2))
+    test1 <- test[s, ]
+    test2 <- test[-s, ]
+    p.test1 <- test.pred[s, ]
+    p.test2 <- test.pred[-s, ]
+    cal_freq <- prop.table(table(cal$Y))
+    pr.class1 <- apply(p.test1, 1, function(row) colnames(p.test1)[which.max(row)])
+    pr.class2 <- apply(p.test2, 1, function(row) colnames(p.test2)[which.max(row)])
+    test_freq1 <- prop.table(table(pr.class1))
+    test_freq2 <- prop.table(table(pr.class2))
+    des_freq1 <- round(test_freq1 * length(cal$Y))
+    des_freq2 <- round(test_freq2 * length(cal$Y))
+
+    idx1 <- idx2 <- NULL
+    for (i in cl.types) {
+        cat <- which(cal$Y == i)
+        if (!is.na(des_freq1[i])) {
+            idx_cat1 <- sample(cat, size = des_freq1[i], replace = TRUE)
+            idx1 <- c(idx1, idx_cat1)
+        }
+        if (!is.na(des_freq2[i])) {
+            idx_cat2 <- sample(cat, size = des_freq2[i], replace = TRUE)
+            idx2 <- c(idx2, idx_cat2)
+        }
     }
-    if(!is.na(des_freq2[i])){
-      idx_cat2 <- sample(cat, size = des_freq2[i], replace = TRUE)
-      idx2 <- c(idx2, idx_cat2)
-    }
-  }
-  
-  return(list(cal1=cal[idx1,], cal2=cal[idx2,], p.cal1=cal.pred[idx1,],
-              p.cal2=cal.pred[idx2,], test1=test1, test2=test2,
-              p.test1=p.test1, p.test2=p.test2))
+
+    return(list(
+        cal1 = cal[idx1, ], cal2 = cal[idx2, ], p.cal1 = cal.pred[idx1, ],
+        p.cal2 = cal.pred[idx2, ], test1 = test1, test2 = test2,
+        p.test1 = p.test1, p.test2 = p.test2
+    ))
 }
 
 # resample.oracle <- function(cal, test, cal.pred, test.pred, seed=NA){
@@ -45,7 +47,7 @@ resample.two <- function(cal, test, cal.pred, test.pred, seed=NA){
 #   test_freq2 <- prop.table(table(test2$Y))
 #   des_freq1 <- round(test_freq1 * length(cal$Y))
 #   des_freq2 <- round(test_freq2 * length(cal$Y))
-#   
+#
 #   idx1 <- idx2 <- NULL
 #   for (i in cl.types) {
 #     cat <- which(cal$Y == i)
@@ -58,120 +60,112 @@ resample.two <- function(cal, test, cal.pred, test.pred, seed=NA){
 #       idx2 <- c(idx2, idx_cat2)
 #     }
 #   }
-#   
+#
 #   return(list(cal1=cal[idx1,], cal2=cal[idx2,], p.cal1=cal.pred[idx1,],
 #               p.cal2=cal.pred[idx2,], test1=test1, test2=test2,
 #               p.test1=p.test1, p.test2=p.test2))
 # }
 
-resample.oracle <- function(cal, test, cal.pred, test.pred, seed=NA){
-  if(!is.na(seed)) set.seed(seed)
- 
-  cal_freq <- prop.table(table(cal$Y))
-  test_freq <- prop.table(table(test$Y))
-  des_freq <- round(test_freq * length(cal$Y))
-  
-  idx <- NULL
-  for (i in cl.types) {
-    cat <- which(cal$Y == i)
-    if(!is.na(des_freq[i])){
-      idx_cat <- sample(cat, size = des_freq[i], replace = TRUE)
-      idx <- c(idx, idx_cat)
-    }
-  }
-  
-  return(list(cal=cal[idx,], p.cal=cal.pred[idx,]))
-}
-
-resample.freq <- function(cal, test, cal.pred, test.pred, freqs, seed=NA){
-  if(!is.na(seed)) set.seed(seed)
-  
-  cal_freq <- prop.table(table(cal$Y))
-  test_freq <- freqs
-  des_freq <- round(test_freq * length(cal$Y))
-  
-  idx <- NULL
-  for (i in cl.types) {
-    cat <- which(cal$Y == i)
-    if(!is.na(des_freq[i])){
-      idx_cat <- sample(cat, size = des_freq[i], replace = TRUE)
-      idx <- c(idx, idx_cat)
-    }
-  }
-  
-  return(list(cal=cal[idx,], p.cal=cal.pred[idx,]))
-}
+resample.oracle <- function(cal, test, cal.pred, test.pred, seed = NA) {
+    if (!is.na(seed)) set.seed(seed)
 
+    cal_freq <- prop.table(table(cal$Y))
+    test_freq <- prop.table(table(test$Y))
+    des_freq <- round(test_freq * length(cal$Y))
 
-
-
-
-exportedFn <- c("pred_sets1", "children", "ancestors", "g")
-conformal <- function(cal, test, p.cal, p.test, alpha=0.1, graph=opi2,
-                      lambdas=seq(0.001,0.999,length.out=100),
-                      expFn=exportedFn){
-  conf.mln <- getConfQuant(p.cal, cal$Y, alpha)
-  pred.mln <- getPredSets(p.test, test$Y, conf.mln$qhat, 
-                              summary=T)
-  tf.conf <- rep(NA, length(pred.mln$pred.sets))
-  for (i in 1:length(pred.mln$pred.sets))
-    tf.conf[i] <- test$Y[i] %in% pred.mln$pred.sets[[i]]
-  
-  sets <- foreach(lambda = lambdas, .export = expFn) %dopar% {
-    lapply(1:nrow(p.cal), 
-           function(i) pred_sets1(lambda, p.cal[i, ], graph))}
-  l <- get_loss_table_mis(lambdas, sets, as.character(cal$Y), graph)
-  lhat <- get_lhat(l, lambdas, alpha)
-  sets.test <- apply(p.test, 1, function(x) pred_sets1(lhat, x, graph))
-  tf.crc <- rep(NA, nrow(test))
-  for (j in 1:nrow(test))
-    tf.crc[j] <- as.character(test$Y)[j] %in% sets.test[[j]]
-  return(list(tf.conf=tf.conf, tf.crc=tf.crc, sets.conf=pred.mln$pred.sets, sets.crc=sets.test))
-  
-}
-
-conf.loo <- function(p.cal, p.test, Ycal, Ytest, alpha){
-  ntest <- nrow(p.test)
-  emp.cov <- rep(NA, ntest)
-  test.sets <- list()
-  for(i in 1:ntest){
-    p.test.loo <- p.test[-i,]
-    cal_freq <- prop.table(table(Ycal))
-    test_prfreq <- apply(p.test.loo, 1, function(row) colnames(p.test.loo)[which.max(row)])
-    test_freq <- prop.table(table(test_prfreq))
-    des_freq <- round(test_freq * length(Ycal))
-  
-    # Resample
     idx <- NULL
-    for (j in cl.types) {
-      cat <- which(Ycal == j)
-      if(!is.na(des_freq[j])){
-        idx_cat <- sample(cat, size = des_freq[j], replace = TRUE)
-        idx <- c(idx, idx_cat)
-      }
+    for (i in cl.types) {
+        cat <- which(cal$Y == i)
+        if (!is.na(des_freq[i])) {
+            idx_cat <- sample(cat, size = des_freq[i], replace = TRUE)
+            idx <- c(idx, idx_cat)
+        }
     }
-    p.cal.loo <- p.cal[idx,]
-    Ycal.loo <- Ycal[idx]
-    
-    # Conformal prediction for i obs
-    q <- getConfQuant(p.cal.loo, Ycal.loo, alpha)
-    p <- getPredSets(t(as.matrix(p.test[i,])), Ytest[i], q$qhat)
-    emp.cov[i] <- p$coverage
-    test.sets[i] <- p$pred.sets
-    #if(i%%100 == 0) cat(i)
-  }
-  return(list(emp.cov=mean(emp.cov), sets=test.sets))
-}
-
 
+    return(list(cal = cal[idx, ], p.cal = cal.pred[idx, ]))
+}
 
+resample.freq <- function(cal, test, cal.pred, test.pred, freqs, seed = NA) {
+    if (!is.na(seed)) set.seed(seed)
 
+    cal_freq <- prop.table(table(cal$Y))
+    test_freq <- freqs
+    des_freq <- round(test_freq * length(cal$Y))
 
+    idx <- NULL
+    for (i in cl.types) {
+        cat <- which(cal$Y == i)
+        if (!is.na(des_freq[i])) {
+            idx_cat <- sample(cat, size = des_freq[i], replace = TRUE)
+            idx <- c(idx, idx_cat)
+        }
+    }
 
+    return(list(cal = cal[idx, ], p.cal = cal.pred[idx, ]))
+}
 
 
 
 
 
+exportedFn <- c("pred_sets1", "children", "ancestors", "g")
+conformal <- function(cal, test, p.cal, p.test, alpha = 0.1, graph = opi2,
+    lambdas = seq(0.001, 0.999, length.out = 100),
+    expFn = exportedFn) {
+    conf.mln <- getConfQuant(p.cal, cal$Y, alpha)
+    pred.mln <- getPredSets(p.test, test$Y, conf.mln$qhat,
+        summary = T
+    )
+    tf.conf <- rep(NA, length(pred.mln$pred.sets))
+    for (i in 1:length(pred.mln$pred.sets)) {
+        tf.conf[i] <- test$Y[i] %in% pred.mln$pred.sets[[i]]
+    }
 
+    sets <- foreach(lambda = lambdas, .export = expFn) %dopar% {
+        lapply(
+            1:nrow(p.cal),
+            function(i) pred_sets1(lambda, p.cal[i, ], graph)
+        )
+    }
+    l <- get_loss_table_mis(lambdas, sets, as.character(cal$Y), graph)
+    lhat <- get_lhat(l, lambdas, alpha)
+    sets.test <- apply(p.test, 1, function(x) pred_sets1(lhat, x, graph))
+    tf.crc <- rep(NA, nrow(test))
+    for (j in 1:nrow(test)) {
+        tf.crc[j] <- as.character(test$Y)[j] %in% sets.test[[j]]
+    }
+    return(list(tf.conf = tf.conf, tf.crc = tf.crc, sets.conf = pred.mln$pred.sets, sets.crc = sets.test))
+}
 
+conf.loo <- function(p.cal, p.test, Ycal, Ytest, alpha) {
+    ntest <- nrow(p.test)
+    emp.cov <- rep(NA, ntest)
+    test.sets <- list()
+    for (i in 1:ntest) {
+        p.test.loo <- p.test[-i, ]
+        cal_freq <- prop.table(table(Ycal))
+        test_prfreq <- apply(p.test.loo, 1, function(row) colnames(p.test.loo)[which.max(row)])
+        test_freq <- prop.table(table(test_prfreq))
+        des_freq <- round(test_freq * length(Ycal))
+
+        # Resample
+        idx <- NULL
+        for (j in cl.types) {
+            cat <- which(Ycal == j)
+            if (!is.na(des_freq[j])) {
+                idx_cat <- sample(cat, size = des_freq[j], replace = TRUE)
+                idx <- c(idx, idx_cat)
+            }
+        }
+        p.cal.loo <- p.cal[idx, ]
+        Ycal.loo <- Ycal[idx]
+
+        # Conformal prediction for i obs
+        q <- getConfQuant(p.cal.loo, Ycal.loo, alpha)
+        p <- getPredSets(t(as.matrix(p.test[i, ])), Ytest[i], q$qhat)
+        emp.cov[i] <- p$coverage
+        test.sets[i] <- p$pred.sets
+        # if(i%%100 == 0) cat(i)
+    }
+    return(list(emp.cov = mean(emp.cov), sets = test.sets))
+}
diff --git a/R/utils_ConformalPS.R b/R/utils_ConformalPS.R
index d9828cf..7d6a605 100644
--- a/R/utils_ConformalPS.R
+++ b/R/utils_ConformalPS.R
@@ -1,26 +1,22 @@
 # Function to get conformal prediction sets with split conformal inference
 
-.getConformalPredSets <- function(p.cal, p.test, y.cal, alpha){
+.getConformalPredSets <- function(p.cal, p.test, y.cal, alpha) {
     # Get calibration scores (1-predicted probability for the true class)
     true <- rep(NA, dim(p.cal)[1])
-    for (i in 1:dim(p.cal)[1])
-      true[i] <- p.cal[i, y.cal[i]]
-    s <- 1-true
+    for (i in 1:dim(p.cal)[1]) {
+        true[i] <- p.cal[i, y.cal[i]]
+    }
+    s <- 1 - true
 
     # Get adjusted quantile
     n <- nrow(p.cal)
-    q_level <- ceiling((n+1)*(1-alpha))/n
+    q_level <- ceiling((n + 1) * (1 - alpha)) / n
     qhat <- quantile(s, q_level)
 
     # Get prediction sets
-    prediction_sets <- p.test >= 1-qhat
+    prediction_sets <- p.test >= 1 - qhat
     pr.list <- lapply(1:nrow(prediction_sets), function(i) {
         colnames(prediction_sets)[prediction_sets[i, ]]
     })
     return(pr.list)
 }
-
-
-
-
-
diff --git a/R/utils_HierarchicalPS.R b/R/utils_HierarchicalPS.R
index 6b7c84c..4dd5248 100644
--- a/R/utils_HierarchicalPS.R
+++ b/R/utils_HierarchicalPS.R
@@ -7,81 +7,94 @@
 # for p.test based on the selected lambda value.
 
 .getHierarchicalPredSets <- function(p.cal, p.test, y.cal, onto, alpha,
-                                     lambdas){
+    lambdas) {
     y.cal <- as.character(y.cal)
     # Get prediction sets for each value of lambda for all the calibration data
     j <- NULL
-    exportedFn = c(".predSets", ".scores", ".children", ".ancestors")
-    sets <- foreach(j = lambdas, .export=exportedFn) %dopar% {
-              lapply(1:nrow(p.cal),
-                function(i) .predSets(lambda=j, pred=p.cal[i, ], onto=onto))
-      }
+    exportedFn <- c(".predSets", ".scores", ".children", ".ancestors")
+    sets <- foreach(j = lambdas, .export = exportedFn) %dopar% {
+        lapply(
+            1:nrow(p.cal),
+            function(i) .predSets(lambda = j, pred = p.cal[i, ], onto = onto)
+        )
+    }
 
     # Get the loss table (ncal x length(lambda) table with TRUE\FALSE)
     loss <- sapply(1:length(lambdas), function(lambda) {
-      sapply(seq_along(y.cal), function(i) {
-        !(y.cal[i] %in% sets[[lambda]][[i]])
-      })
+        sapply(seq_along(y.cal), function(i) {
+            !(y.cal[i] %in% sets[[lambda]][[i]])
+        })
     })
 
     # Get lhat
     n <- nrow(loss)
     rhat <- colMeans(loss)
-    lhat_idx <- min(which(((n/(n+1)) * rhat + 1/(n+1) ) <= alpha))
+    lhat_idx <- min(which(((n / (n + 1)) * rhat + 1 / (n + 1)) <= alpha))
     lhat <- lambdas[lhat_idx]
 
 
     # Get prediction sets for test data
-    sets.test <- apply(p.test, 1, function(x) .predSets(lambda=lhat, pred=x,
-                                                        onto=onto))
+    sets.test <- apply(p.test, 1, function(x) {
+        .predSets(
+            lambda = lhat, pred = x,
+            onto = onto
+        )
+    })
 
     return(sets.test)
 }
 
 # Function to retrieve all ancestors of a given node, starting from an igraph
 # object. Default includes also the given node
-.ancestors <- function(node, onto, include_self=TRUE){
-    if(include_self)
-        return(V(onto)$name[is.finite(distances(onto, node, mode="in"))])
-    else
-        return(V(onto)$name[is.finite(distances(onto, node, mode="in")) &
-                             V(onto)$name!=node])
+.ancestors <- function(node, onto, include_self = TRUE) {
+    if (include_self) {
+        return(V(onto)$name[is.finite(distances(onto, node, mode = "in"))])
+    } else {
+        return(V(onto)$name[is.finite(distances(onto, node, mode = "in")) &
+            V(onto)$name != node])
+    }
 }
 
 # Function to retrieve children of a given node, starting from an igraph object.
 # By default, gives only the children that are leaves in the ontology.
-.children <- function(node, onto, leaf=TRUE){
-    if(leaf)
-        return(V(onto)$name[is.finite(distances(onto, node, mode="out")) &
-                             degree(onto, mode="out")==0])
-    else
-        return(V(onto)$name[is.finite(distances(onto, node, mode="out"))])
+.children <- function(node, onto, leaf = TRUE) {
+    if (leaf) {
+        return(V(onto)$name[is.finite(distances(onto, node, mode = "out")) &
+            degree(onto, mode = "out") == 0])
+    } else {
+        return(V(onto)$name[is.finite(distances(onto, node, mode = "out"))])
+    }
 }
 
 # Function to compute scores of a given node in an ontology (sum of estimated
 # probabilities for the leaf nodes that are children of the given one)
-.scores <- function(pred, int_node, onto){
+.scores <- function(pred, int_node, onto) {
     c <- .children(node = int_node, onto = onto, leaf = TRUE)
     return(sum(pred[c]))
 }
 
 # Function to get prediction sets following the ontology
-.predSets <- function(lambda, pred, onto){
+.predSets <- function(lambda, pred, onto) {
     # Get the predicted class and its ancestors
     pred_class <- names(pred)[which.max(pred)]
     anc <- .ancestors(node = pred_class, onto = onto, include_self = TRUE)
 
     # Compute scores for all the ancestor of the predicted class
-    s <- sapply(as.character(anc), function(i) .scores(pred=pred, int_node=i,
-                                                       onto=onto))
+    s <- sapply(as.character(anc), function(i) {
+        .scores(
+            pred = pred, int_node = i,
+            onto = onto
+        )
+    })
     names(s) <- anc
 
-    #Sort them by score and if there are ties by distance to the predicted class
+    ## Sort them by score and if there are ties by distance to the predicted
+    ## class
     ## compute distance from predicted class
-    pos <- distances(onto, v=anc, to=pred_class, mode="out")
+    pos <- distances(onto, v = anc, to = pred_class, mode = "out")
     tie_breaker <- as.vector(t(pos))
     names(tie_breaker) <- colnames(t(pos))
-    sorted_indices <- order(s, tie_breaker, decreasing=FALSE)
+    sorted_indices <- order(s, tie_breaker, decreasing = FALSE)
     sorted_scores <- s[sorted_indices]
 
     # Select the first score that is geq than lambda
@@ -89,14 +102,12 @@
 
 
     # Add also the subgraphs we would have obtained with smaller lambda
-    selected <- c(lapply(anc[round(s, 15) < lambda], function(x)
-                    .children(node = x, onto = onto)),
-                  list(.children(sel_node, onto)))
+    selected <- c(
+        lapply(anc[round(s, 15) < lambda], function(x) {
+            .children(node = x, onto = onto)
+        }),
+        list(.children(sel_node, onto))
+    )
 
     return(Reduce(union, selected))
 }
-
-
-
-
-
diff --git a/R/utils_Resample.R b/R/utils_Resample.R
index aab41ce..f97689f 100644
--- a/R/utils_Resample.R
+++ b/R/utils_Resample.R
@@ -3,10 +3,10 @@
 # Right now it implements a two-fold strategy, dividing randomly
 # test data in two.
 
-resample.two <- function(p.cal, p.test, y.cal, labels){
-    s <- sample(1:nrow(p.test), round(nrow(p.test)/2))
-    test1 <- p.test[s,]
-    test2 <- p.test[-s,]
+resample.two <- function(p.cal, p.test, y.cal, labels) {
+    s <- sample(1:nrow(p.test), round(nrow(p.test) / 2))
+    test1 <- p.test[s, ]
+    test2 <- p.test[-s, ]
 
     # Compute predicted class
     pr.class1 <- apply(test1, 1, function(row) colnames(test1)[which.max(row)])
@@ -20,23 +20,25 @@ resample.two <- function(p.cal, p.test, y.cal, labels){
     idx1 <- idx2 <- NULL
     for (i in labels) {
         cat <- which(y.cal == i)
-        if(!is.na(des.freq1[i])){
+        if (!is.na(des.freq1[i])) {
             idx.cat1 <- sample(cat, size = des.freq1[i], replace = TRUE)
             idx1 <- c(idx1, idx.cat1)
         }
-        if(!is.na(des.freq2[i])){
+        if (!is.na(des.freq2[i])) {
             idx.cat2 <- sample(cat, size = des.freq2[i], replace = TRUE)
             idx2 <- c(idx2, idx.cat2)
         }
     }
 
-    return(list(p.cal1=p.cal[idx1,],
-                p.cal2=p.cal[idx2,],
-                p.test1=test1,
-                p.test2=test2,
-                y.cal1=y.cal[idx1],
-                y.cal2=y.cal[idx2],
-                idx=c(s, setdiff(1:nrow(p.test), s)))#index in the original data
-           )
+    return(
+        list(
+            p.cal1 = p.cal[idx1, ],
+            p.cal2 = p.cal[idx2, ],
+            p.test1 = test1,
+            p.test2 = test2,
+            y.cal1 = y.cal[idx1],
+            y.cal2 = y.cal[idx2],
+            idx = c(s, setdiff(1:nrow(p.test), s))
+        ) # index in the original data
+    )
 }
-
diff --git a/man/getPredictionSets.Rd b/man/getPredictionSets.Rd
index df15094..0bf4884 100644
--- a/man/getPredictionSets.Rd
+++ b/man/getPredictionSets.Rd
@@ -135,7 +135,7 @@ still guaranteed that
 \examples{
 # random p matrix
 set.seed(1040)
-p <- matrix(rnorm(2000*4), ncol=4)
+p <- matrix(rnorm(2000 * 4), ncol = 4)
 # Normalize the matrix p to have all numbers between 0 and 1 that sum to 1
 # by row
 p <- exp(p - apply(p, 1, max))
@@ -144,25 +144,25 @@ cell.types <- c("T (CD4+)", "T (CD8+)", "B", "NK")
 colnames(p) <- cell.types
 
 # Take 1000 rows as calibration and 1000 as test
-p.cal <- p[1:1000,]
-p.test <- p[1001:2000,]
+p.cal <- p[1:1000, ]
+p.test <- p[1001:2000, ]
 
 # Randomly create the vector of real cell types for p.cal and p.test
-y.cal <- sample(cell.types, 1000, replace=TRUE)
-y.test <- sample(cell.types, 1000, replace=TRUE)
+y.cal <- sample(cell.types, 1000, replace = TRUE)
+y.test <- sample(cell.types, 1000, replace = TRUE)
 
 # Obtain conformal prediction sets
-conf.sets <- getPredictionSets(x.query=p.test,
-                               x.cal=p.cal,
-                               y.cal=y.cal,
-                               onto=NULL,
-                               alpha=0.1,
-                               follow.ontology=FALSE,
-                               resample.cal=FALSE,
-                               labels=cell.types,
-                               return.sc=FALSE
-                               )
-
+conf.sets <- getPredictionSets(
+    x.query = p.test,
+    x.cal = p.cal,
+    y.cal = y.cal,
+    onto = NULL,
+    alpha = 0.1,
+    follow.ontology = FALSE,
+    resample.cal = FALSE,
+    labels = cell.types,
+    return.sc = FALSE
+)
 
 }
 \references{
diff --git a/vignettes/vignette.Rmd b/vignettes/vignette.Rmd
index 08f2ead..08bed3b 100644
--- a/vignettes/vignette.Rmd
+++ b/vignettes/vignette.Rmd
@@ -17,8 +17,8 @@ vignette: >
 
 ```{r, include = FALSE}
 knitr::opts_chunk$set(
-  collapse = TRUE,
-  comment = "#>"
+    collapse = TRUE,
+    comment = "#>"
 )
 ```
 
@@ -34,10 +34,10 @@ library(scConform)
 # library(doParallel)
 # library(igraph)
 # library(scuttle)
-# 
+#
 # `%notin%` = Negate(`%in%`)
-                                                                                
-# num_cores <- 8  
+
+# num_cores <- 8
 # cl1 <- makeCluster(num_cores)
 # registerDoParallel(cl1)
 ```