Skip to content

Commit

Permalink
Merge pull request #707 from ARTbio/gsc_gene_expr_corr
Browse files Browse the repository at this point in the history
Update gsc_gene_expression_correlation
  • Loading branch information
drosofff authored Nov 7, 2024
2 parents 023776e + 5151cde commit 1573c4d
Show file tree
Hide file tree
Showing 3 changed files with 91 additions and 83 deletions.
1 change: 1 addition & 0 deletions tools/gsc_gene_expression_correlations/.shed.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ long_description: |
between genes and a signature of selected genes.
categories:
- Transcriptomics
- Single Cell
homepage_url: http://artbio.fr
remote_repository_url: https://github.com/ARTbio/tools-artbio/tree/master/tools/gsc_gene_expression_correlations
toolshed:
Expand Down
168 changes: 86 additions & 82 deletions tools/gsc_gene_expression_correlations/correlation_with_signature.R
Original file line number Diff line number Diff line change
Expand Up @@ -10,11 +10,12 @@
# --gene_corr_pval <gene-gene corr pvalues file>
# --sig_corr <genes correlation to signature file>

options(show.error.messages = FALSE,
error = function() {
cat(geterrmessage(), file = stderr())
q("no", 1, FALSE)
}
options(
show.error.messages = FALSE,
error = function() {
cat(geterrmessage(), file = stderr())
q("no", 1, FALSE)
}
)
loc <- Sys.setlocale("LC_MESSAGES", "en_US.UTF-8")

Expand All @@ -23,75 +24,76 @@ library(Hmisc)

# Arguments
option_list <- list(
make_option(
"--sep",
default = "\t",
type = "character",
help = "File separator, must be the same for all input files [default : '%default' ]"
),
make_option(
"--colnames",
default = TRUE,
type = "logical",
help = "Consider first lines as header (must stand for all input files) [default : '%default' ]"
),
make_option(
"--expression_file",
default = NA,
type = "character",
help = "Input file that contains log2(CPM +1) expression values"
),
make_option(
"--signatures_file",
default = NA,
type = "character",
help = "Input file that contains cell signature"
),
make_option(
"--sig_corr",
default = "sig_corr.tsv",
type = "character",
help = "signature correlations output [default : '%default' ]"
),
make_option(
"--gene_corr",
default = "gene_corr.tsv",
type = "character",
help = "genes-genes correlations output [default : '%default' ]"
),
make_option(
"--gene_corr_pval",
default = "gene_corr_pval.tsv",
type = "character",
help = "genes-genes correlations pvalues output [default : '%default' ]"
)
make_option(
"--sep",
default = "\t",
type = "character",
help = "File separator, must be the same for all input files [default : '%default' ]"
),
make_option(
"--colnames",
default = TRUE,
type = "logical",
help = "Consider first lines as header (must stand for all input files) [default : '%default' ]"
),
make_option(
"--expression_file",
default = NA,
type = "character",
help = "Input file that contains log2(CPM +1) expression values"
),
make_option(
"--signatures_file",
default = NA,
type = "character",
help = "Input file that contains cell signature"
),
make_option(
"--sig_corr",
default = "sig_corr.tsv",
type = "character",
help = "signature correlations output [default : '%default' ]"
),
make_option(
"--gene_corr",
default = "gene_corr.tsv",
type = "character",
help = "genes-genes correlations output [default : '%default' ]"
),
make_option(
"--gene_corr_pval",
default = "gene_corr_pval.tsv",
type = "character",
help = "genes-genes correlations pvalues output [default : '%default' ]"
)
)

opt <- parse_args(OptionParser(option_list = option_list),
args = commandArgs(trailingOnly = TRUE))
args = commandArgs(trailingOnly = TRUE)
)

if (opt$sep == "tab") {
opt$sep <- "\t"
opt$sep <- "\t"
}
if (opt$sep == "comma") {
opt$sep <- ","
opt$sep <- ","
}

# Open files
data <- read.delim(
opt$expression_file,
header = opt$colnames,
row.names = 1,
sep = opt$sep,
check.names = FALSE
opt$expression_file,
header = opt$colnames,
row.names = 1,
sep = opt$sep,
check.names = FALSE
)
signature <- read.delim(
opt$signatures_file,
header = TRUE,
stringsAsFactors = FALSE,
row.names = 1,
sep = opt$sep,
check.names = FALSE
opt$signatures_file,
header = TRUE,
stringsAsFactors = FALSE,
row.names = 1,
sep = opt$sep,
check.names = FALSE
)


Expand All @@ -105,39 +107,41 @@ data <- rbind(t(signature), data)
gene_corr <- rcorr(t(data), type = "pearson") # transpose because we correlate genes, not cells

# Gene correlation with signature score
gene_signature_corr <- cbind.data.frame(gene = colnames(gene_corr$r),
Pearson_correlation = gene_corr$r[, 1],
p_value = gene_corr$P[, 1])
gene_signature_corr <- cbind.data.frame(
gene = colnames(gene_corr$r),
Pearson_correlation = gene_corr$r[, 1],
p_value = gene_corr$P[, 1]
)
gene_signature_corr <- gene_signature_corr[order(gene_signature_corr[, 2], decreasing = TRUE), ]


### Save files ###

write.table(
format(gene_signature_corr, digits = 2),
file = opt$sig_corr,
sep = "\t",
quote = FALSE,
col.names = TRUE,
row.names = FALSE
format(gene_signature_corr, digits = 2),
file = opt$sig_corr,
sep = "\t",
quote = FALSE,
col.names = TRUE,
row.names = FALSE
)

r_genes <- data.frame(gene = rownames(gene_corr$r), gene_corr$r) # add rownames as a variable for output
write.table(
format(r_genes[-1, -2], digits = 2),
file = opt$gene_corr,
sep = "\t",
quote = FALSE,
col.names = TRUE,
row.names = FALSE
format(r_genes[-1, -2], digits = 2),
file = opt$gene_corr,
sep = "\t",
quote = FALSE,
col.names = TRUE,
row.names = FALSE
)

p_genes <- data.frame(gene = rownames(gene_corr$P), gene_corr$P) # add rownames as a variable for output
write.table(
format(p_genes[-1, -2], digits = 2),
file = opt$gene_corr_pval,
sep = "\t",
quote = FALSE,
col.names = TRUE,
row.names = FALSE
format(p_genes[-1, -2], digits = 2),
file = opt$gene_corr_pval,
sep = "\t",
quote = FALSE,
col.names = TRUE,
row.names = FALSE
)
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
<tool id="single_cell_gene_expression_correlations" name="single-cell gene expression correlations" version="4.3.1+galaxy0" profile="21.01">
<tool id="single_cell_gene_expression_correlations" name="single-cell gene expression correlations" version="4.3.1+galaxy1" profile="21.01">
<description>between genes or with a signature of selected genes</description>
<xrefs>
<xref type="bio.tools">galaxy_single_cell_suite</xref>
</xrefs>
<requirements>
<requirement type="package" version="1.7.3">r-optparse</requirement>
<requirement type="package" version="5.1_1">r-hmisc</requirement>
Expand Down

0 comments on commit 1573c4d

Please sign in to comment.