From dc6b2068b488bd685fa917496e61074392d3adfd Mon Sep 17 00:00:00 2001 From: drosofff Date: Thu, 7 Nov 2024 19:54:09 +0100 Subject: [PATCH 1/2] update gsc_gene_expression_correlation --- tools/gsc_gene_expression_correlations/.shed.yml | 1 + .../correlation_with_signature.xml | 5 ++++- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/tools/gsc_gene_expression_correlations/.shed.yml b/tools/gsc_gene_expression_correlations/.shed.yml index 2ec0346c9..8efdafb2e 100644 --- a/tools/gsc_gene_expression_correlations/.shed.yml +++ b/tools/gsc_gene_expression_correlations/.shed.yml @@ -7,6 +7,7 @@ long_description: | between genes and a signature of selected genes. categories: - Transcriptomics + - Single Cell homepage_url: http://artbio.fr remote_repository_url: https://github.com/ARTbio/tools-artbio/tree/master/tools/gsc_gene_expression_correlations toolshed: diff --git a/tools/gsc_gene_expression_correlations/correlation_with_signature.xml b/tools/gsc_gene_expression_correlations/correlation_with_signature.xml index 07666d218..6b6e06cbb 100644 --- a/tools/gsc_gene_expression_correlations/correlation_with_signature.xml +++ b/tools/gsc_gene_expression_correlations/correlation_with_signature.xml @@ -1,5 +1,8 @@ - + between genes or with a signature of selected genes + + galaxy_single_cell_suite + r-optparse r-hmisc From 5151cde480ba4415700f3660a55c3dcaf5d243d1 Mon Sep 17 00:00:00 2001 From: Christophe Antoniewski Date: Thu, 7 Nov 2024 19:57:09 +0100 Subject: [PATCH 2/2] reindent R code --- .../correlation_with_signature.R | 168 +++++++++--------- 1 file changed, 86 insertions(+), 82 deletions(-) diff --git a/tools/gsc_gene_expression_correlations/correlation_with_signature.R b/tools/gsc_gene_expression_correlations/correlation_with_signature.R index 3c7e8757c..ecd510357 100644 --- a/tools/gsc_gene_expression_correlations/correlation_with_signature.R +++ b/tools/gsc_gene_expression_correlations/correlation_with_signature.R @@ -10,11 +10,12 @@ # --gene_corr_pval # --sig_corr -options(show.error.messages = FALSE, - error = function() { - cat(geterrmessage(), file = stderr()) - q("no", 1, FALSE) - } +options( + show.error.messages = FALSE, + error = function() { + cat(geterrmessage(), file = stderr()) + q("no", 1, FALSE) + } ) loc <- Sys.setlocale("LC_MESSAGES", "en_US.UTF-8") @@ -23,75 +24,76 @@ library(Hmisc) # Arguments option_list <- list( - make_option( - "--sep", - default = "\t", - type = "character", - help = "File separator, must be the same for all input files [default : '%default' ]" - ), - make_option( - "--colnames", - default = TRUE, - type = "logical", - help = "Consider first lines as header (must stand for all input files) [default : '%default' ]" - ), - make_option( - "--expression_file", - default = NA, - type = "character", - help = "Input file that contains log2(CPM +1) expression values" - ), - make_option( - "--signatures_file", - default = NA, - type = "character", - help = "Input file that contains cell signature" - ), - make_option( - "--sig_corr", - default = "sig_corr.tsv", - type = "character", - help = "signature correlations output [default : '%default' ]" - ), - make_option( - "--gene_corr", - default = "gene_corr.tsv", - type = "character", - help = "genes-genes correlations output [default : '%default' ]" - ), - make_option( - "--gene_corr_pval", - default = "gene_corr_pval.tsv", - type = "character", - help = "genes-genes correlations pvalues output [default : '%default' ]" - ) + make_option( + "--sep", + default = "\t", + type = "character", + help = "File separator, must be the same for all input files [default : '%default' ]" + ), + make_option( + "--colnames", + default = TRUE, + type = "logical", + help = "Consider first lines as header (must stand for all input files) [default : '%default' ]" + ), + make_option( + "--expression_file", + default = NA, + type = "character", + help = "Input file that contains log2(CPM +1) expression values" + ), + make_option( + "--signatures_file", + default = NA, + type = "character", + help = "Input file that contains cell signature" + ), + make_option( + "--sig_corr", + default = "sig_corr.tsv", + type = "character", + help = "signature correlations output [default : '%default' ]" + ), + make_option( + "--gene_corr", + default = "gene_corr.tsv", + type = "character", + help = "genes-genes correlations output [default : '%default' ]" + ), + make_option( + "--gene_corr_pval", + default = "gene_corr_pval.tsv", + type = "character", + help = "genes-genes correlations pvalues output [default : '%default' ]" + ) ) opt <- parse_args(OptionParser(option_list = option_list), - args = commandArgs(trailingOnly = TRUE)) + args = commandArgs(trailingOnly = TRUE) +) if (opt$sep == "tab") { - opt$sep <- "\t" + opt$sep <- "\t" } if (opt$sep == "comma") { - opt$sep <- "," + opt$sep <- "," } # Open files data <- read.delim( - opt$expression_file, - header = opt$colnames, - row.names = 1, - sep = opt$sep, - check.names = FALSE + opt$expression_file, + header = opt$colnames, + row.names = 1, + sep = opt$sep, + check.names = FALSE ) signature <- read.delim( - opt$signatures_file, - header = TRUE, - stringsAsFactors = FALSE, - row.names = 1, - sep = opt$sep, - check.names = FALSE + opt$signatures_file, + header = TRUE, + stringsAsFactors = FALSE, + row.names = 1, + sep = opt$sep, + check.names = FALSE ) @@ -105,39 +107,41 @@ data <- rbind(t(signature), data) gene_corr <- rcorr(t(data), type = "pearson") # transpose because we correlate genes, not cells # Gene correlation with signature score -gene_signature_corr <- cbind.data.frame(gene = colnames(gene_corr$r), - Pearson_correlation = gene_corr$r[, 1], - p_value = gene_corr$P[, 1]) +gene_signature_corr <- cbind.data.frame( + gene = colnames(gene_corr$r), + Pearson_correlation = gene_corr$r[, 1], + p_value = gene_corr$P[, 1] +) gene_signature_corr <- gene_signature_corr[order(gene_signature_corr[, 2], decreasing = TRUE), ] ### Save files ### write.table( - format(gene_signature_corr, digits = 2), - file = opt$sig_corr, - sep = "\t", - quote = FALSE, - col.names = TRUE, - row.names = FALSE + format(gene_signature_corr, digits = 2), + file = opt$sig_corr, + sep = "\t", + quote = FALSE, + col.names = TRUE, + row.names = FALSE ) r_genes <- data.frame(gene = rownames(gene_corr$r), gene_corr$r) # add rownames as a variable for output write.table( - format(r_genes[-1, -2], digits = 2), - file = opt$gene_corr, - sep = "\t", - quote = FALSE, - col.names = TRUE, - row.names = FALSE + format(r_genes[-1, -2], digits = 2), + file = opt$gene_corr, + sep = "\t", + quote = FALSE, + col.names = TRUE, + row.names = FALSE ) p_genes <- data.frame(gene = rownames(gene_corr$P), gene_corr$P) # add rownames as a variable for output write.table( - format(p_genes[-1, -2], digits = 2), - file = opt$gene_corr_pval, - sep = "\t", - quote = FALSE, - col.names = TRUE, - row.names = FALSE + format(p_genes[-1, -2], digits = 2), + file = opt$gene_corr_pval, + sep = "\t", + quote = FALSE, + col.names = TRUE, + row.names = FALSE )