Skip to content

Commit

Permalink
Merge pull request #5 from PolyOmica/VariousFixes
Browse files Browse the repository at this point in the history
Various fixes, including fix for Issue #4
  • Loading branch information
sahirbhatnagar committed Feb 11, 2016
2 parents 1335100 + 73248c6 commit 820844a
Show file tree
Hide file tree
Showing 25 changed files with 238 additions and 110 deletions.
33 changes: 23 additions & 10 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -13,21 +13,34 @@ Authors@R: c(person("Karim", "Oualkacha", email =
"[email protected]", role = "aut")
)
Author: Karim Oualkacha [aut, cre],
M'Hamed Lajmi Lakhal-Chaieb [aut],
Celia M.T. Greenwood [aut],
Lennart C. Karssen [aut],
Sodbo Sharapov [aut]
M'Hamed Lajmi Lakhal-Chaieb [aut],
Celia M.T. Greenwood [aut],
Lennart C. Karssen [aut],
Sodbo Sharapov [aut]
Maintainer: Karim Oualkacha <[email protected]>
Description: This is a collection of the five region-based
rare-variant genetic association tests. The following tests are
currently implemented: ASKAT, ASKAT-Normalized, VC-C1, VC-C2 and
VC-C3.
Depends: R (>= 3.2.2), foreach, doParallel
Imports: ks, CompQuadForm, Matrix, snpStats, kinship2
Suggests: GenABEL, knitr, methods
rare-variant genetic association tests. The following tests are
currently implemented: ASKAT, ASKAT-Normalized, VC-C1, VC-C2 and
VC-C3.
Depends:
R (>= 3.2.2),
foreach,
doParallel
Imports:
ks,
CompQuadForm,
data.table,
Matrix,
snpStats,
kinship2
Suggests:
GenABEL,
knitr,
methods
License: GPL (>=3)
Copyright: Celia M.T. Greenwood
LazyData: true
VignetteBuilder: knitr
NeedsCompilation: no
Packaged: 2015-12-04 01:53:06 UTC; karimoualkacha
RoxygenNote: 5.0.1
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ import(foreach)
import(snpStats)
importFrom(CompQuadForm,davies)
importFrom(Matrix,nearPD)
importFrom(data.table,fread)
importFrom(kinship2,kindepth)
importFrom(ks,hns)
importFrom(ks,kdde)
4 changes: 2 additions & 2 deletions R/ASKAT.region.R
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ ASKAT.region <- function(y=NULL,
startpos=startpos,
endpos=endpos)

if (ncol(haplotypes) == 0) {
if (is.null(haplotypes) || ncol(haplotypes) == 0) {
warning("No genotypes available in the region from ",
startpos, " to ", endpos, " on chromosome ", chr)
result.df <- data.frame(Score.Test=NA,
Expand Down Expand Up @@ -101,7 +101,7 @@ ASKAT.region <- function(y=NULL,

result.df <- data.frame(Score.Test=pval$score,
P.value=pval$p.value,
N.Markers=ncol(haplotypes))
N.Markers=ncol(G))

if (!is.null(regionname)) {
rownames(result.df) <- regionname
Expand Down
13 changes: 8 additions & 5 deletions R/NormalizedASKAT.region.R
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
##' Runs the normalized ASKAT method on a given genomic region. Rank-based normalization is applied
##' to the phenotype residauls under the null model, after adjusting for covariate effects
##' Runs the normalized ASKAT method on a given genomic region.
##'
##' Rank-based normalization is applied to the phenotype residuals
##' under the null model, after adjusting for covariate effects.
##'
##' @title Run the normalized ASKAT method on a genomic region defined
##' by a start and a stop base pair coordinate
Expand All @@ -17,7 +19,8 @@
##' \item \code{regionname}: Name of the region/gene on which you
##' are running the association test
##' }
##' @author Lennart C. Karssen, Sodbo Sharapov
##' @author Lennart C. Karssen
##' @author Sodbo Sharapov
##' @export
NormalizedASKAT.region <- function(y=NULL,
X=NULL,
Expand Down Expand Up @@ -56,7 +59,7 @@ NormalizedASKAT.region <- function(y=NULL,
startpos=startpos,
endpos=endpos)

if (ncol(haplotypes) == 0) {
if (is.null(haplotypes) || ncol(haplotypes) == 0) {
warning("No genotypes available in the region from ",
startpos, " to ", endpos, " on chromosome ", chr)
result.df <- data.frame(Score.Test=NA,
Expand Down Expand Up @@ -85,7 +88,7 @@ NormalizedASKAT.region <- function(y=NULL,

result.df <- data.frame(Score.Test=pval$score,
P.value=pval$p.value,
N.Markers=ncol(haplotypes))
N.Markers=ncol(G))

if (!is.null(regionname)) {
rownames(result.df) <- regionname
Expand Down
7 changes: 4 additions & 3 deletions R/VCC1.region.R
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,8 @@
##' \item \code{regionname}: Name of the region/gene on which you
##' are running the association test
##' }
##' @author Sodbo Sharapov, Lennart C. Karssen
##' @author Sodbo Sharapov
##' @author Lennart C. Karssen
##' @export
VCC1.region <- function(y=NULL,
X=NULL,
Expand Down Expand Up @@ -57,7 +58,7 @@ VCC1.region <- function(y=NULL,
startpos=startpos,
endpos=endpos)

if (ncol(haplotypes) == 0) {
if (is.null(haplotypes) || ncol(haplotypes) == 0) {
warning("No genotypes available in the region from ",
startpos, " to ", endpos, " on chromosome ", chr)
result.df <- data.frame(Score.Test=NA,
Expand Down Expand Up @@ -85,7 +86,7 @@ VCC1.region <- function(y=NULL,

result.df <- data.frame(Score.Test=pval$score,
P.value=pval$p.value,
N.Markers=ncol(haplotypes))
N.Markers=ncol(G))

if (!is.null(regionname)) {
rownames(result.df) <- regionname
Expand Down
6 changes: 3 additions & 3 deletions R/VCC2.region.R
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
##' Runs the VC-C2 method on a given genomic region
##'
##' @title Run the VC-C2 method on a genomic region defined by a start
##' and a stop base pair coordinate
##' and a stop base pair coordinate
##' @inheritParams read.haplo
##' @inheritParams VCC1.region
##' @param Nperm Integer, number of permutations to use for empirical
Expand Down Expand Up @@ -60,7 +60,7 @@ VCC2.region <- function(y=NULL,
startpos=startpos,
endpos=endpos)

if (ncol(haplotypes) == 0) {
if (is.null(haplotypes) || ncol(haplotypes) == 0) {
warning("No genotypes available in the region from ",
startpos, " to ", endpos, " on chromosome ", chr)
result.df <- data.frame(Score.Test=NA,
Expand Down Expand Up @@ -113,7 +113,7 @@ VCC2.region <- function(y=NULL,

result.df <- data.frame(Score.Test=pval$score,
P.value=pval$p.value,
N.Markers=ncol(haplotypes))
N.Markers=ncol(G))

if (!is.null(regionname)) {
rownames(result.df) <- regionname
Expand Down
6 changes: 3 additions & 3 deletions R/VCC3.region.R
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
##' Runs the VC-C3 method on a given genomic region
##'
##' @title Run the VC-C3 method on a genomic region defined by a start
##' and a stop base pair coordinate
##' and a stop base pair coordinate
##' @inheritParams VCC2.region
##' @return A data frame containing the results of the association
##' test. The data frame contains the following columns:
Expand Down Expand Up @@ -53,7 +53,7 @@ VCC3.region <- function(y=NULL,
startpos=startpos,
endpos=endpos)

if (ncol(haplotypes) == 0) {
if (is.null(haplotypes) || ncol(haplotypes) == 0) {
warning("No genotypes available in the region from ",
startpos, " to ", endpos, " on chromosome ", chr)
result.df <- data.frame(Score.Test=NA,
Expand Down Expand Up @@ -106,7 +106,7 @@ VCC3.region <- function(y=NULL,

result.df <- data.frame(Score.Test=pval$score,
P.value=pval$p.value.VCC3,
N.Markers=ncol(haplotypes))
N.Markers=ncol(G))

if (!is.null(regionname)) {
rownames(result.df) <- regionname
Expand Down
8 changes: 8 additions & 0 deletions R/ff.hap.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
ff.hap <- function(k){
if (k==1) {
x <- 6 + c(1, 2)
} else {
x <- 6 + c((2*k - 1), (2*k))
}
return(x)
}
24 changes: 24 additions & 0 deletions R/lettersTo12.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
lettersTo12 <- function(x){
x1 <- c(x[, 1], x[, 2])
Letters.SNP <- unique(x1)

if (length(Letters.SNP) == 1) {
x[, 1] <- x[, 2] <- 2
}

if (length(Letters.SNP) > 1) {
if ( ( length(which(x1 == Letters.SNP[1])) / length(x1) ) > 0.5 ) {
x[which(x[, 1] == Letters.SNP[1]), 1] <- 2
x[which(x[, 2] == Letters.SNP[1]), 2] <- 2
x[which(x[, 1] == Letters.SNP[2]), 1] <- 1
x[which(x[, 2] == Letters.SNP[2]), 2] <- 1
} else {
x[which(x[, 1] == Letters.SNP[2]), 1] <- 2
x[which(x[, 2] == Letters.SNP[2]), 2] <- 2
x[which(x[, 1] == Letters.SNP[1]), 1] <- 1
x[which(x[, 2] == Letters.SNP[1]), 2] <- 1
}
}

return(x)
}
4 changes: 2 additions & 2 deletions R/pvalue.ASKAT.R
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
##' Compute p-value and score for the ASKAT method
##'
##' @param RH0 a vector of length 2 which the results (output) of the
##' \code{\link{Estim.H0.ASKAT}} function (i.e. variance components
##' estimates under the null model)
##' \code{\link{Estim.H0.ASKAT}} function (i.e. variance
##' components estimates under the null model)
##' @inheritParams RVPedigree
##' @param G matrix of genotypes
##' @return A list with score and p-value for the ASKAT test on the
Expand Down
24 changes: 13 additions & 11 deletions R/read.haplo.R
Original file line number Diff line number Diff line change
Expand Up @@ -2,24 +2,26 @@
#' files or ShapeIt output files
#'
#' @param type character, \code{'ped'}, \code{'bed'} (default) or
#' \code{'shapeit-haps'} format of input file containing haplotype
#' data
#' @param filename character, path to input file containing haplotype data
#' \code{'shapeit-haps'} format of input file containing haplotype
#' data
#' @param filename character, path to input file containing haplotype
#' data
#' @param map object, data.frame contains 3 columns: rsID, chromosome,
#' position in bp as output by e.g. \code{\link{readMapFile}}.
#' @param chr character, chromosome number (basically from 1 to 22 as used by
#' \href{http://pngu.mgh.harvard.edu/~purcell/plink/data.shtml#ped}{Plink}),
#' on which the region of interest is located
#' position in bp as output by e.g. \code{\link{readMapFile}}.
#' @param chr character, chromosome number (basically from 1 to 22 as
#' used by
#' \href{http://pngu.mgh.harvard.edu/~purcell/plink/data.shtml#ped}{Plink}),
#' on which the region of interest is located
#' @param startpos numeric, start position (in bp, base pairs) of the
#' region of interest (default: 0)
#' @param endpos numeric, end position (in bp, base pairs) of the
#' region of interest (default: 0)
#' @return matrix object containing the haplotypes selected by the
#' region of interest
#' region of interest
#' @seealso \code{\link{read.haplo.pedfile}},
#' \code{\link{read.haplo.bedfile}},
#' \code{\link{read.haplo.shapeit_haps}},
#' \code{\link{readMapFile}}
#' \code{\link{read.haplo.bedfile}},
#' \code{\link{read.haplo.shapeit_haps}},
#' \code{\link{readMapFile}}
#' @keywords internal
read.haplo <- function(type="bed",
filename,
Expand Down
21 changes: 15 additions & 6 deletions R/read.haplo.bedfile.R
Original file line number Diff line number Diff line change
Expand Up @@ -2,15 +2,17 @@
#' (regular PLINK binary files)
#'
#' @param filename character, path to BED file containing haplotype
#' data. Attention! filename should contain path to \code{<file>.bed}
#' with full file name. For example \code{../mydata/inputplink.bed}.
#' data. Attention! filename should contain path to
#' \code{<file>.bed} with full file name. For example
#' \code{../mydata/inputplink.bed}.
#' @inheritParams read.haplo
#' @return matrix object containing the haplotypes selected by the
#' region of interest
#' region of interest, or \code{NULL} if there are no variants in
#' the region
#' @seealso \code{\link{read.haplo}},
#' \code{\link{read.haplo.pedfile}},
#' \code{\link{read.haplo.shapeit_haps}},
#' \code{\link{readMapFile}}
#' \code{\link{read.haplo.pedfile}},
#' \code{\link{read.haplo.shapeit_haps}},
#' \code{\link{readMapFile}}
#' @import snpStats
#' @keywords internal
read.haplo.bedfile <- function(filename = "NULL",
Expand All @@ -24,6 +26,13 @@ read.haplo.bedfile <- function(filename = "NULL",
map[, 3] < endpos),
2]

if ( length(snps2out) < 1 ) {
warning("No genotypes available in the region from ",
startpos, " to ", endpos, " on chromosome ",
chr, " (filename ", filename, ")")
return(NULL)
}

basefile <- sub(".bed$", "", filename)
plink.input <- snpStats::read.plink(bed = paste0(basefile, ".bed"),
bim = paste0(basefile, ".bim"),
Expand Down
73 changes: 51 additions & 22 deletions R/read.haplo.pedfile.R
Original file line number Diff line number Diff line change
Expand Up @@ -2,36 +2,65 @@
#' (regular PLINK untransposed text files)
#'
#' @param filename character, path to PED file containing haplotype
#' data. Attention! filename should contain path to \code{<file>.ped}
#' with full file name. For example \code{../mydata/inputplink.ped}
#' data. Attention! filename should contain path to
#' \code{<file>.ped} with full file name. For example
#' \code{../mydata/inputplink.ped}
#' @param recode character, designates if data are in 1/2 format or in
#' letters (A, C, G, T) format. The defaut is 1/2 where 1
#' designating the minor allele
#' @inheritParams read.haplo
#' @return matrix object containing the haplotypes selected by the
#' region of interest
#' region of interest, or \code{NULL} if there are no variants in
#' the region
#' @seealso \code{\link{read.haplo}},
#' \code{\link{read.haplo.bedfile}},
#' \code{\link{read.haplo.shapeit_haps}},
#' \code{\link{readMapFile}}
#' @import snpStats
#' \code{\link{read.haplo.bedfile}},
#' \code{\link{read.haplo.shapeit_haps}},
#' \code{\link{readMapFile}}
#' @author Lennart C. Karssen
#' @author Sodbo Sharapov
#' @author Karim Oualkacha
#' @importFrom data.table fread
#' @keywords internal
read.haplo.pedfile <- function(filename = "NULL",
map,
chr = "NULL",
startpos = "NULL",
endpos = "NULL"){
# TODO: probably we want to automatically append ".ped" to filename
# TODO: add extensions
snps2out <- map[which(map[, 1] == chr &
map[, 3] > startpos &
map[, 3] < endpos),
2]
endpos = "NULL",
recode = "recodeLetters"){

plink.input <- snpStats::read.pedfile(file = filename,
snps = snps2out)
snps2out <- as.numeric(rownames(map)[which(map[, 1] == chr &
map[, 3] > startpos &
map[, 3] < endpos)])

GenotypeMatrix <- methods::as(plink.input$genotypes, "numeric")
# TODO: added 'rownames(GenotypeMatrix) <- idNames' may required
return(GenotypeMatrix)
}
if ( length(snps2out) < 1 ) {
warning("No genotypes available in the region from ",
startpos, " to ", endpos, " on chromosome ",
chr, " (filename ", filename, ")")
return(NULL)
}

cols2out = as.vector(sapply(snps2out, ff.hap))

# tmp = read.haplo.pedfile(filename = "data.ped",map = tmp1,chr = 1,
# startpos = 9000, endpos = 25000)
switch(recode,
recode12={
haplotypes = as.data.frame(fread(filename,
select = cols2out,
colClasses = 'character'))
},
recodeLetters={
haplotypes = as.data.frame(fread(filename,
select = cols2out,
colClasses = 'character'))
list.haplo = lapply(seq_len(ncol(haplotypes)/2), function(i){
haplotypes[, c(((2*i) - 1):(2*i))]
}
)
haplotypes = lapply(list.haplo, lettersTo12)
haplotypes = matrix(as.numeric(unlist(haplotypes)),
nrow = dim(haplotypes[[1]])[1],
byrow = FALSE)
}
)

return(haplotypes)
}
Loading

0 comments on commit 820844a

Please sign in to comment.