From d395259fc0f975ebc517efe6a5d3dd0d0be8c2c0 Mon Sep 17 00:00:00 2001
From: DanielaCorbetta <corbettadaniela@gmail.com>
Date: Fri, 10 May 2024 16:54:44 -0400
Subject: [PATCH] solved bug order resample

---
 .DS_Store                | Bin 8196 -> 8196 bytes
 DESCRIPTION              |   2 +-
 R/getPredictionSets.R    |  92 ++++++++++++++++++++-------------------
 R/resample.R             |  31 ++++++-------
 man/getPredictionSets.Rd |  10 ++---
 vignettes/vignette.Rmd   |   5 ++-
 6 files changed, 73 insertions(+), 67 deletions(-)

diff --git a/.DS_Store b/.DS_Store
index 5f982103988e9287733b36da9c016d8eb8c06c7e..e63351aa9e30c4552d9e21d6fcdb138aa769531d 100644
GIT binary patch
delta 21
ccmZp1XmQxENtnaT%veXk)X;eIE@3Yo08CN_U;qFB

delta 16
XcmZp1XmQxENqDlJp!?>Z!rVLnH>L&c

diff --git a/DESCRIPTION b/DESCRIPTION
index 6f504cf..b10ea61 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,4 +1,4 @@
-Package: ConfCell
+Package: scConform
 Type: Package
 Title: Conformal Inference For Cell Type Annotation
 Version: 0.99.0
diff --git a/R/getPredictionSets.R b/R/getPredictionSets.R
index f5e6848..6a9c49f 100644
--- a/R/getPredictionSets.R
+++ b/R/getPredictionSets.R
@@ -22,11 +22,11 @@
 #' @param onto the considered section of the cell ontology as an igraph object.
 #' @param alpha a number between 0 and 1 that indicates the allowed miscoverage
 #' @param lambdas a vector of possible lambda values to be considered. Necessary
-#' only when \code{follow_ontology=TRUE}.
-#' @param follow_ontology If \code{TRUE}, then the function returns hierarchical
+#' only when \code{follow.ontology=TRUE}.
+#' @param follow.ontology If \code{TRUE}, then the function returns hierarchical
 #' prediction sets that follow the cell ontology structure. If \code{FALSE}, it
 #' returns classical conformal prediction sets.
-#' @param resample_cal Should the calibration dataset be resampled according to
+#' @param resample.cal Should the calibration dataset be resampled according to
 #' the estimated relative frequencies of cell types in the query data?
 #' @param labels labels of different considered cell types. Necessary if
 #' \code{onto=NULL}, otherwise they are set to the leaf nodes of the provided
@@ -59,14 +59,14 @@
 
 getPredictionSets <- function(x.query, x.cal, y.cal, onto=NULL, alpha = 0.1,
                               lambdas = seq(0.001,0.999,length.out=100),
-                              follow_ontology=TRUE,
-                              resample_cal=FALSE,
+                              follow.ontology=TRUE,
+                              resample.cal=FALSE,
                               labels=NULL,
                               return.sc=NULL, pr.name="pred.set"){
-    if(follow_ontology & is.null(onto)){
+    if(follow.ontology & is.null(onto)){
         stop("An ontology is required for hierarchical prediction set.
              Please provide one or ask for conformal prediction set
-             (follow_ontology=FALSE)")
+             (follow.ontology=FALSE)")
     }
     if(is.null(onto) & is.null(labels)){
         stop("Please provide cell labels with the labels parameter")
@@ -87,7 +87,7 @@ getPredictionSets <- function(x.query, x.cal, y.cal, onto=NULL, alpha = 0.1,
               SingleCellExperiment or a matrix")
 
     # Retrieve labels from the ontology (need to add retrieval from y.cal/data
-    # when follow_ontology=FALSE)
+    # when follow.ontology=FALSE)
     if(is.null(labels))
         labels <- V(onto)$name[degree(onto, mode="out")==0]
     K <- length(labels)
@@ -98,23 +98,23 @@ getPredictionSets <- function(x.query, x.cal, y.cal, onto=NULL, alpha = 0.1,
         p.query <- matrix(NA, nrow=n.query, ncol=K)
         colnames(p.query) <- labels
         for(i in labels){
-          p.query[,i] <- colData(x.query)[[i]]
+            p.query[,i] <- colData(x.query)[[i]]
         }
     }
     else p.query <- x.query
 
     if(!is.matrix(x.cal)){
-      n.cal <- ncol(x.cal)
-      p.cal <- matrix(NA, nrow=n.cal, ncol=K)
-      colnames(p.cal) <- labels
-      for(i in labels){
-        p.cal[,i] <- colData(x.cal)[[i]]
-      }
+        n.cal <- ncol(x.cal)
+        p.cal <- matrix(NA, nrow=n.cal, ncol=K)
+        colnames(p.cal) <- labels
+        for(i in labels){
+            p.cal[,i] <- colData(x.cal)[[i]]
+        }
     }
     else p.cal <- x.cal
 
-    if(!resample_cal){
-        if (follow_ontology){
+    if(!resample.cal){
+        if (follow.ontology){
           pred.sets <- .getHierarchicalPredSets(p.cal=p.cal, p.test=p.query,
                                                 y.cal=y.cal, onto=onto,
                                                 alpha=alpha,
@@ -125,36 +125,40 @@ getPredictionSets <- function(x.query, x.cal, y.cal, onto=NULL, alpha = 0.1,
                                              y.cal=y.cal, alpha=alpha)
     }
 
-    if(resample_cal){
+    if(resample.cal){
         data <- resample.two(p.cal=p.cal, p.test=p.query, y.cal=y.cal,
                              labels=labels)
-        if (follow_ontology){
-          pred.sets1 <- .getHierarchicalPredSets(p.cal=data$p.cal1,
-                                                 p.test=data$p.test2,
-                                                 y.cal=data$y.cal1,
-                                                 onto=onto,
-                                                 alpha=alpha,
-                                                 lambdas=lambdas)$sets.test
-          pred.sets2 <- .getHierarchicalPredSets(p.cal=data$p.cal2,
-                                                 p.test=data$p.test1,
-                                                 y.cal=data$y.cal2,
-                                                 onto=onto,
-                                                 alpha=alpha,
-                                                 lambdas=lambdas)$sets.test
-          pred.sets <- c(pred.sets1, pred_sets2)
+        if (follow.ontology){
+            pred.sets1 <- .getHierarchicalPredSets(p.cal=data$p.cal2,
+                                                   p.test=data$p.test1,
+                                                   y.cal=data$y.cal2,
+                                                   onto=onto,
+                                                   alpha=alpha,
+                                                   lambdas=lambdas)$sets.test
+            pred.sets2 <- .getHierarchicalPredSets(p.cal=data$p.cal1,
+                                                   p.test=data$p.test2,
+                                                   y.cal=data$y.cal1,
+                                                   onto=onto,
+                                                   alpha=alpha,
+                                                   lambdas=lambdas)$sets.test
+            pred.sets <- c(pred.sets1, pred.sets2)
         }
-        else
-          pred.sets1 <- .getConformalPredSets(p.cal=data$p.cal1,
-                                              p.test=data$p.test2,
-                                              y.cal=data$y.cal1,
-                                              alpha=alpha)
-          pred.sets2 <- .getConformalPredSets(p.cal=data$p.cal2,
-                                              p.test=data$p.test1,
-                                              y.cal=data$y.cal2,
-                                              alpha=alpha)
-          pred.sets <- c(pred.sets1, pred_sets2)
-    } # Problem: this prediction sets are not ordered, must be ordered before
-      #          assigning them to the colData
+        else {
+            pred.sets1 <- .getConformalPredSets(p.cal=data$p.cal2,
+                                                p.test=data$p.test1,
+                                                y.cal=data$y.cal2,
+                                                alpha=alpha)
+            pred.sets2 <- .getConformalPredSets(p.cal=data$p.cal1,
+                                                p.test=data$p.test2,
+                                                y.cal=data$y.cal1,
+                                                alpha=alpha)
+            pred.sets <- c(pred.sets1, pred.sets2)
+        }
+        # Order the prediction set
+        pred.sets <- pred.sets[order(data$idx)]
+    }
+
+
 
     # if not specified, return a sc object if the input was a sc object,
     # a matrix if the input was a matrix
diff --git a/R/resample.R b/R/resample.R
index 39a2534..924c515 100644
--- a/R/resample.R
+++ b/R/resample.R
@@ -1,37 +1,38 @@
 
 resample.two <- function(p.cal, p.test, y.cal, labels){
-    s <- sample(1:nrow(test), round(nrow(test)/2))
+    s <- sample(1:nrow(p.test), round(nrow(p.test)/2))
     test1 <- p.test[s,]
     test2 <- p.test[-s,]
-    # cal_freq <- prop.table(table(y.cal))
+
     # Compute predicted class
     pr.class1 <- apply(test1, 1, function(row) colnames(test1)[which.max(row)])
     pr.class2 <- apply(test2, 1, function(row) colnames(test2)[which.max(row)])
-    test_freq1 <- prop.table(table(pr.class1))
-    test_freq2 <- prop.table(table(pr.class2))
+    test.freq1 <- prop.table(table(pr.class1))
+    test.freq2 <- prop.table(table(pr.class2))
     # Transform to absolute frequencies
-    des_freq1 <- round(test_freq1 * length(y.cal))
-    des_freq2 <- round(test_freq2 * length(y.cal))
+    des.freq1 <- round(test.freq1 * length(y.cal))
+    des.freq2 <- round(test.freq2 * length(y.cal))
 
     idx1 <- idx2 <- NULL
     for (i in labels) {
       cat <- which(y.cal == i)
-      if(!is.na(des_freq1[i])){
-        idx_cat1 <- sample(cat, size = des_freq1[i], replace = TRUE)
-        idx1 <- c(idx1, idx_cat1)
+      if(!is.na(des.freq1[i])){
+        idx.cat1 <- sample(cat, size = des.freq1[i], replace = TRUE)
+        idx1 <- c(idx1, idx.cat1)
       }
-      if(!is.na(des_freq2[i])){
-        idx_cat2 <- sample(cat, size = des_freq2[i], replace = TRUE)
-        idx2 <- c(idx2, idx_cat2)
+      if(!is.na(des.freq2[i])){
+        idx.cat2 <- sample(cat, size = des.freq2[i], replace = TRUE)
+        idx2 <- c(idx2, idx.cat2)
       }
     }
 
     return(list(p.cal1=p.cal[idx1,],
                 p.cal2=p.cal[idx2,],
-                p.test1=p.test1,
-                p.test2=p.test2,
+                p.test1=test1,
+                p.test2=test2,
                 y.cal1=y.cal[idx1],
-                y.cal2=y.cal[idx2])
+                y.cal2=y.cal[idx2],
+                idx=c(s, setdiff(1:nrow(p.test), s)))#index in the original data
            )
 }
 
diff --git a/man/getPredictionSets.Rd b/man/getPredictionSets.Rd
index c555baa..2841fa3 100644
--- a/man/getPredictionSets.Rd
+++ b/man/getPredictionSets.Rd
@@ -11,8 +11,8 @@ getPredictionSets(
   onto = NULL,
   alpha = 0.1,
   lambdas = seq(0.001, 0.999, length.out = 100),
-  follow_ontology = TRUE,
-  resample_cal = FALSE,
+  follow.ontology = TRUE,
+  resample.cal = FALSE,
   labels = NULL,
   return.sc = NULL,
   pr.name = "pred.set"
@@ -39,13 +39,13 @@ calibration data}
 \item{alpha}{a number between 0 and 1 that indicates the allowed miscoverage}
 
 \item{lambdas}{a vector of possible lambda values to be considered. Necessary
-only when \code{follow_ontology=TRUE}.}
+only when \code{follow.ontology=TRUE}.}
 
-\item{follow_ontology}{If \code{TRUE}, then the function returns hierarchical
+\item{follow.ontology}{If \code{TRUE}, then the function returns hierarchical
 prediction sets that follow the cell ontology structure. If \code{FALSE}, it
 returns classical conformal prediction sets.}
 
-\item{resample_cal}{Should the calibration dataset be resampled according to
+\item{resample.cal}{Should the calibration dataset be resampled according to
 the estimated relative frequencies of cell types in the query data?}
 
 \item{labels}{labels of different considered cell types. Necessary if
diff --git a/vignettes/vignette.Rmd b/vignettes/vignette.Rmd
index ab9d6a3..108de0c 100644
--- a/vignettes/vignette.Rmd
+++ b/vignettes/vignette.Rmd
@@ -4,12 +4,13 @@ author:
     - name: Daniela Corbetta
       affiliation: Department of Statistical Sciences, University of Padova 
       email: daniela.corbetta@phd.unipd.it
+package: scConform
 output:
   BiocStyle::html_document:
     toc: true
     toc_float: true
 vignette: >
-  %\VignetteIndexEntry{my-vignette}
+  %\VignetteIndexEntry{scConform}
   %\VignetteEngine{knitr::rmarkdown}
   %\VignetteEncoding{UTF-8}
 ---
@@ -22,5 +23,5 @@ knitr::opts_chunk$set(
 ```
 
 ```{r setup}
-library(ConfCell)
+library(scConform)
 ```