diff --git a/.Rbuildignore b/.Rbuildignore new file mode 100644 index 0000000..8339047 --- /dev/null +++ b/.Rbuildignore @@ -0,0 +1,4 @@ +^.*\.Rproj$ +^\.Rproj\.user$ +^packrat/ +^\.Rprofile$ diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..5b6a065 --- /dev/null +++ b/.gitignore @@ -0,0 +1,4 @@ +.Rproj.user +.Rhistory +.RData +.Ruserdata diff --git a/DESCRIPTION b/DESCRIPTION new file mode 100644 index 0000000..dbbc41d --- /dev/null +++ b/DESCRIPTION @@ -0,0 +1,14 @@ +Package: vautils +Type: Package +Title: handy little functions +Version: 0.1.0 +Author: Vasily Aushev +Maintainer: Vasily Aushev +Description: bunch of stuff + under development +License: to be defined +Encoding: UTF-8 +LazyData: true +Imports: + magrittr, + data.table diff --git a/NAMESPACE b/NAMESPACE new file mode 100644 index 0000000..e709ccf --- /dev/null +++ b/NAMESPACE @@ -0,0 +1,5 @@ +exportPattern("^[[:alpha:]]+") +exportPattern("%.*%") +export("%+%.default") +export("%+%.character") +import(data.table) diff --git a/R/data.R b/R/data.R new file mode 100644 index 0000000..c42bee6 --- /dev/null +++ b/R/data.R @@ -0,0 +1,250 @@ + +getEnvByName <- function(inpEnv=.GlobalEnv, lookFor){ + e <- inpEnv; + while (environmentName(e) != 'R_EmptyEnv' & environmentName(e)!=lookFor) e <- parent.env(e); + if (environmentName(e) != lookFor) return(NULL); + return(e); +} + +show.envs <- function(inpEnv=NULL){ + e <- inpEnv; + if (is.null(e)) e <- environment(show.envs); + # e <- .GlobalEnv; + i <- 0; + while (environmentName(e) != 'R_EmptyEnv') { + cat(i,': ', environmentName(e),'\n') + # cat(str(e, give.attr=F)) + # print(exists('%+%', envir = e)) + e <- parent.env(e); + i <- i+1; + } +} + +show.envs.plus <- function(){ + e <- environment(show.envs); + # e <- .GlobalEnv; + i <- 0; + while (environmentName(e) != 'R_EmptyEnv') { + cat(i,': ', environmentName(e),'. ') + # cat(str(e, give.attr=F)) + cat(exists('%+%', envir = e)) + cat('\n') + e <- parent.env(e); + i <- i+1; + } +} + + +# %+c%: c() +"%+c%" <- function(arg1, arg2){return(c(arg1, arg2));} +"%+C%" <- function(arg1, arg2){return(c(arg1, arg2));} +'%,%' <- function(...){ return(c(...)) } + +# %+l%: list() +"%+l%" <- function(arg1, arg2){return(list(arg1, arg2));} +"%+L%" <- function(arg1, arg2){return(list(arg1, arg2));} +'%;%' <- function(...){ return(list(...)) } + + + +"%!in%" <- function(arg1, arg2){ + return(!(arg1 %in% arg2)); +} + +# set difference +"%-%" <- function(arg1, arg2){ + return(setdiff(arg1, arg2)); +} + +# intersect +"%&%" <- function(arg1, arg2){return(intersect(arg1, arg2));} + + + +"%~%" <- function(arg1, arg2){ + if (identical(sort(arg1, na.last=T), sort(arg2, na.last=T))) {return(TRUE);} + else {print(sort(arg1, na.last=T)); print(sort(arg2, na.last=T));} + return(FALSE); +} + +# %===%: alias for `identical()` #### +"%===%" <- function(x,y) { + return(identical(x,y)); +} + +# %==%: compares with (NA==NA) = TRUE; factors as character #### +"%==%" <- function(vec1, vec2){ + if (!is.vector(vec1)) vec1 <- unlist(vec1, use.names = F); + if (!is.vector(vec2)) vec2 <- unlist(vec2, use.names = F); + if (is.factor(vec1)) vec1 <- as.character(vec1); + if (is.factor(vec2)) vec2 <- as.character(vec2); + + rez <- (vec1==vec2); + rez[is.na(vec1) & is.na(vec2)] <- TRUE; + rez[is.na(rez)] <- FALSE; + rez; +} # e. %==%: + + + +# %inw% - returns positions of any of needles in hay #### +# ex.: c(2,2,3) %inw% c(1,1,2,2,3,3,4,4) +# will return: 3 4 5 6 +"%inw%" <- function(needles, hay) { + return(which(hay %in% needles)); +} + + +# geometric mean +gm_mean = function(x, na.rm=TRUE){ + exp(sum(log(x[x > 0]), na.rm=na.rm) / length(x)) +} + +sortnames <- function(input) sort(names(input)) + +toBase <- function (inpN, base, alphabet="0123456789abcdefghijklmnopqrstuvwxyz", pad=NA) { + if (length(inpN)==1 && is.na(inpN)) return(NA_character_); + strrep <- function(x, times){return(paste0(rep(x,times),collapse=""))} + if (length(alphabet)==1) alphabet <- unlist(strsplit(alphabet,split = '',fixed = T)); + if (missing(base)) {base <- length(alphabet);} + if (length(alphabet)1) return(sapply(inpN, toBase, base=base, alphabet=alphabet, pad=pad)); + rezStr <- ""; + Q <- floor(abs(inpN)); + R <- NULL; + while(TRUE) { + R <- Q %% base; + rezStr <- alphabet[R+1] %+% rezStr; + Q <- (Q-R)/base; + if (Q==0) break; + } + if (!is.na(pad) && nchar(rezStr)1) return(sapply(inpS, fromBase, alphabet=alphabet)); + + for (i in seq_len(nchar(inpS))){ + .char <- substr(inpS,i,i) + #.this <- grep(.char, alphabet); # this doesn't work!!! WTF!!! + .this <- match(.char, alphabetSS) - 1; + pos <- nchar(inpS) - i + 1; + rezN <- rezN + base^(pos-1)*.this; + #cat(i, .char, pos, .this, rezN, '\n'); + } + return(rezN); +} + +int2bitsV <- function(x) { which(as.logical(intToBits(x)), T);} + + +addBinaryFlag <- function(flagtxt, inpArrName="flagsList"){ + wEnv <- globalenv(); # environment needed to modify variable with flags list + + if (exists(inpArrName, envir=wEnv )){ # check if variable with flags list is defined + inpArr <- get(inpArrName, envir = wEnv); # then use it + } else {inpArr <- array(dim=0);} # otherwise create it as an empty array + + newval <- match(flagtxt, inpArr); # trying to find requested flag text in current list, to avoid duplication + + if (length(newval)==0 || is.na(newval)) { # if this flag text isn't in list + newval <- length(inpArr)+1; # we will add it as new member + inpArr[newval] <- flagtxt; + assign(inpArrName, inpArr, envir=wEnv); # modify variable in parent (global) variable + } + return(newval); +} + +time1 <- function() { + assign("time1before", proc.time()[3], envir = .GlobalEnv) + timestamp(); +} + +time2 <- function() { + time1after<-proc.time()[3]; + # time1before <- + (time1after-time1before); + #rm(time1before); +} + + +# Total incomprehensible magic #### +list <- structure(NA,class="result") +"[<-.result" <- function(x,...,value) { + args <- as.list(match.call()) + args <- args[-c(1:2,length(args))] + length(value) <- length(args) + for(i in seq(along=args)) { + a <- args[[i]] + if(!missing(a)) eval.parent(substitute(a <- v,list(a=a,v=value[[i]]))) + } + x +} + +# fac2num() - convert factors to numeric #### +fac2num <- function(inpdt, cols){ + for(col in cols){ + if(col %in% names(inpdt)){ + inpdt[[col]] <- as.numeric(inpdt[[col]]); + } + } + return(inpdt); +} + +# roundC(): rounds all negative to 0, all positive to integer #### +# ex.: roundC(c(-1, 0.2, 1.5, -3.8)) +# will return 0 0 2 0 +roundC <- function(x){ + ifelse(x<0, 0, round(x)); +} + +# checks if all elements are equal to one another +all.same <- function(input, na.rm=FALSE){ + if (length(input)<2) {return(TRUE);} + if (all(is.na(input))) {return(TRUE);} + return(isTRUE(all(input==input[1], na.rm = na.rm))); +} + +# non-vectorized ifelse #### +ifelse1 <- function(test, yes, no){ + if (all(test)) return(yes) else return(no); +} + +# smart seq_len #### +seqlen <- function(obj){ + if (length(obj)==1 & is.numeric(obj)) {return(seq_len(obj));} + if (is.data.frame(obj)) {return(seq_len(nrow(obj)));} + return(seq_len(length(obj))); +} + + +replace.mult <- function(inpvec, from, to){ + stopifnot(length(from)>0 & length(from)==length(to)); + for (i in seqlen(from)){ + inpvec <- replace(inpvec, inpvec==from[i], to[i]) + } + return(inpvec) +} + + +orderby <- function(x,y){ + if (is.character(y) && length(y)==1 && (y %in% names(x))) { + ret.ord <- x[[y]]; + } else ret.ord <- y; + if ('data.table' %in% class(x)) { + return(x[order(ret.ord),]) + } else return(x[order(ret.ord)]) + +} + +na.allow <- function(input) return(is.na(input) | input); diff --git a/R/datatable.R b/R/datatable.R new file mode 100644 index 0000000..28b2b0f --- /dev/null +++ b/R/datatable.R @@ -0,0 +1,816 @@ + +load_DT <- function(dtVar, fnVar, fnDef, refresh=T, colsIncl, colsExcl=NULL, ...) { + varnameDT <- deparse(substitute(dtVar)); + varnameFN <- deparse(substitute(fnVar)); + newfn <- ""; + if (exists(varnameDT) & refresh!=T) { + cat("DT (", varnameDT, ") already exists!\n"); + dtOut <- copy(dtVar); + } else { + if (refresh==T) {cat("Existing DT (", varnameDT, ") ignored! ");} else {cat("DT (", varnameDT, ") does not exist! ");} + if (exists(varnameFN)) { + cat("filename variable (", varnameFN,") is defined! \n"); + newfn <- fnVar; + } else { + cat("filename (", varnameFN,") was not defined before! \n"); + if (!missing(fnDef)) {newfn <- fnDef;} else {cat("Default filename not set!\n")}; + } + } + dtOut <- loadDT(fnInput = newfn, colsExcl=colsExcl, ...); + return(dtOut); +} # e. load_DT + +loadDT <- function(fnInput, sep="\t", header=T, refresh=T, colsIncl, colsExcl=NULL, force_chr=NULL, force_num=NULL, ...) { + while (!file.exists(fnInput)) { + ans <- ask("File not found: ", fnInput, ".\n Enter another file name, or press Enter to try again, or ESC to exit: "); + if (ans!="") {fnInput <- ans;} + } # end while + + # if (!is.null(force_chr)) + + cat(fnInput, "exists. Loading... "); + dtOut <- fread(input = fnInput, sep=sep, header=header, ...); + cat(nrow(dtOut), " records loaded.\n"); + + if (!missing(colsIncl)) { + dtOut <- dtOut[,colsIncl, with=FALSE]; + } + + if (!missing(colsExcl)) { + outnames <- setdiff(names(dtOut), colsExcl); + cat('outnames: ', outnames, '\n') + dtOut <- dtOut[,outnames, with=FALSE]; + } + + invisible(dtOut); +} + + +save_DT <- function(dtIn, fnSaveTo=NULL, quote=F, sep="\t", header=T, row.names=F, ...) { + if (is.null(fnSaveTo)) { + fnSaveTo <- deparse(substitute(dtIn)); + if (sep=='\t') fnSaveTo <- paste0(fnSaveTo, '.tsv'); + if (sep==',') fnSaveTo <- paste0(fnSaveTo, '.csv'); + } + cat("Saving to:", fnSaveTo, "... "); + if (!is.data.frame(dtIn)) {dtIn <- as.data.frame(dtIn);} + write.table(dtIn, file=fnSaveTo, quote=quote, sep=sep, row.names=row.names, col.names=header, ...); + cat(nrow(dtIn), "records saved."); +} + +inexcel <- function(dtIn, ...){ + fn_save <- tempfile(pattern=paste0(deparse(substitute(dtIn)),"_"), fileext = '.xls'); + save_DT(dtIn, fnSaveTo = fn_save, ...) + system(command = paste0('cmd /C ', fn_save)); + return(fn_save); +} + + +# tdt() - transpose a data.table #### +tdt <- function(inpdt, newcolname=names(inpdt)[1]){ + transposed <- t(inpdt[,-1,with=F]); + colnames(transposed) <- inpdt[[1]]; + transposed <- data.table(transposed, keep.rownames=T); + setnames(transposed, 1, newcolname); + return(transposed); +} + +# returns a vector of values where TRUE means this column is empty #### +emptycolumns <- function(input.table){ + return( + sapply(1:ncol(input.table), + function(X){ + all(is.na(input.table[, X])) + } + ) + ); +} + +dterr <- function (dtIn, condition, errtext) { + eval.parent(substitute({ # header of magic + if (!is.element('errors', names(dtIn))) {dtIn[,errors:=""];} + #dtIn[eval(substitute(condition)), errors:={paste0(errors, errtext)}]; # without magic we needed this + dtIn[condition, errors:={paste0(errors, errtext)}]; + invisible(dtIn); + })); # footer of magic +} + + + + +# dtIn - data.table containing our records +# condition - expression defines which records will be flagged +# flagtxt - text of error message +adderrb <- function(dtIn, condition, flagtxt, inpArrName="flagsList"){ + newval <- addBinaryFlag(flagtxt = flagtxt, inpArrName = inpArrName); + # adding flags to the 'errors' column of our table + eval.parent(substitute({ # header of magic + if (!is.element('errors', names(dtIn))) {dtIn[,errors:=0L];} # if this column did not exist, create it, initialize with 0L + dtIn[condition, errors:=bitwOr(errors, 2^(newval-1))]; # for records which meet condition, we add new flad to existing mask + invisible(dtIn); + })); # footer of magic + +} + +flexread <- function(fnRead, sheetIndex=1, sheetName=NULL, silent=T, keyby = NA, reqFile=T, char=NULL, num=NULL, ...){ + cat('\nOpen ' %+% fnRead); + + if (!file.exists(fnRead)){ + if (reqFile) stop('... File not found!\n') + else warning('... File not found!\n'); + return(NULL); + } + + if (substrRight(fnRead,4) %in% cs('xlsx xlsm .xls')) { + cat(' with read.xlsx... '); + req('xlsx', verbose = F); + rez <- read.xlsx(fnRead, sheetIndex, sheetName, ...); + rez <- data.table(rez); + } + else if (substrRight(fnRead,8) == 'sas7bdat') { + cat(' with read.sas7bdat... '); + req('sas7bdat', verbose = F); + rez <- read.sas7bdat(fnRead); + rez <- data.table(rez); + } + else { + cat(' with fread... '); + rez <- fread(fnRead, ...); + } + if (length(rez)>0) cat('Success:', paste(dim(rez),collapse = ' x ')); + if (!is.na(keyby)) setkeyv(rez, keyby) + + if (!is.null(num)){ + for (.col in num){ + if (.col %in% names(rez) & !class(rez[[.col]])=='character'){rez[[.col]] <- as.numeric(as.character(rez[[.col]]));} + } # e. for + } # e. if + + if (!is.null(char)){ + for (.col in char){ + if (.col %in% names(rez) & !class(rez[[.col]])=='character'){ + #rez[[.col]] <- as.character(rez[[.col]]) + set(rez, j = .col, value = as.character(rez[[.col]])) + } + } # e. for + } # e. if !is.null(char) + + return(rez); +} + + + + + + + + + + + + + + + + + + + + + + +apprec <- function(dtInput, dtRecord){ + oldkeys <- key(dtInput); + #print(names(dtRecord)); + if (ncol(dtInput)>ncol(dtRecord)) { + cat("Main table has more columns (", ncol(dtInput), ") than table being added (", ncol(dtRecord), "). Adding missing colums: \n"); + for (eachcol in names(dtInput)) { + if (is.element(eachcol, names(dtRecord))) next; + thisclass <- class(dtInput[[eachcol]]); + cat(eachcol, ": ", thisclass, "\n"); + dtRecord[,eachcol:=as(NA, thisclass), with=F]; + } # end for + #print(names(dtRecord)); + } # end if > + dtInput <- rbind(dtInput, dtRecord, use.names=T); + setkeyv(dtInput, oldkeys); + invisible(dtInput); +} + +duplicated3 <- function(dtIn, bykey) { # + if (nrow(dtIn)==0) return(integer(0)); + dtOut <- data.table(dtIn); # instead of copy(). Maybe not needed... + if (!missing(bykey)) { # if key is passed, we re-key the table + dtOut[, tmp.id.original:=seq_len(nrow(dtOut))] # to save original order + setkeyv(dtOut, bykey); + } + dups1 <- duplicated(dtOut); + if (is.null(key(dtOut))){ # if table is not keyed + dups2 <- duplicated(dtOut, fromLast = T); + dups <- dups1 | rev(dups2); + } else { # if table is keyed + dups2 <- c(dups1[-1L], FALSE); + dups <- dups1 | dups2; + if (!missing(bykey)) {dups <- dups[order(dtOut$tmp.id.original)]} + } + return(dups); +} + +# returns all column names except those set as keys +notkeys <- function(dtIn, stringnames=T){ + if (! 'data.table' %in% class(dtIn)) stop('Not a data.table!'); + return(setdiff(names(dtIn), key(dtIn))); +} + +dtdup <- function(dtIn, bykey) { # makes new data.table with only duplicated records from input table + if (nrow(dtIn)==0) return(dtIn); + dtOut <- data.table(dtIn); # instead of copy() + if (!missing(bykey)) {setkeyv(dtOut, bykey);} + if (is.null(key(dtOut))){ + dups1 <- duplicated(dtOut); + dups2 <- duplicated(dtOut[nrow(dtOut):1]); + dups <- dups1 | rev(dups2); + } else { + dups1 <- duplicated(dtOut); + dups2 <- c(dups1[-1L], FALSE); + dups <- dups1 | dups2; + } + dtOut <- dtOut[dups==TRUE]; + return(dtOut); +} + +cmp2fldsbynum <- function(dtIn, f1n, f2n, verbose=F){ # compares 2 columns, returns vector (F means equal, T means different) + if (length(f1n)>1) stop('cmp2dupfldsbynum: one argument expected in f1n, ', length(f1n), ' received: ', f1n); + if (length(f2n)>1) stop('cmp2dupfldsbynum: one argument expected in f2n, ', length(f2n), ' received: ', f2n); + if (!is.numeric(f1n) | !is.numeric(f2n)) stop('Column indices must be numeric!'); + if (f1n > ncol(dtIn) | f2n > ncol(dtIn)) {warning('Table has less columns than requested number!'); return(F);} + if (f1n==f2n) {warning('Identical indices, nothing to compare!'); return(F);} + + ftitle <- paste0(f1n, " (", names(dtIn)[f1n], ") vs ", f2n, " (", names(dtIn)[f2n], ")"); + if (verbose) cat(ftitle); + values1 <- dtIn[[f1n]]; + values2 <- dtIn[[f2n]]; + diff <- F; + + if (is.factor(values1)) {values1 <- as.character.factor(values1);} + if (is.factor(values2)) {values2 <- as.character.factor(values2);} + + if (class(values1)=='Date') {values1 <- as.character(values1);} + if (class(values2)=='Date') {values2 <- as.character(values2);} + + diff <- (values1 != values2); + diff[is.na(diff)] <- T + + invisible(diff); +} + + +cmp2flds.bak <- function(dtIn, f1, f2, verbose=F){ # compares 2 fields + if (length(f1)!=1) stop('cmp2dupflds: one value expected in f1, ', length(f1), ' received: ', f1); + if (length(f2)!=1) stop('cmp2dupflds: one value expected in f2, ', length(f2), ' received: ', f2); + + if (is.numeric(f1) & is.numeric(f2)) {return(cmp2dupfldsbynum(dtIn, f1, f2, tolNA));} + + ftitle <- f1 %+% " vs " %+% f2; + if (! f1 %in% names(dtIn)) {warning("No column named ", f1); return(F);} + if (! f2 %in% names(dtIn)) {warning("No column named ", f2); return(F);} + + if (f1==f2 & sum(names(dtIn)==f1)<2){ + warning('Identical column names (', f1, ') indicated, but there is only 1 column with this name!') + return(invisible(F)); + } + + f1n = which(names(dtIn)==f1) + f2n = which(names(dtIn)==f2) + + if (f1==f2){f1n = f1n[1]; f2n = f2n[2];} + + if (length(f1n)>1){warning('Multiple fields named ', f1);} + if (length(f2n)>1){warning('Multiple fields named ', f2);} + + invisible(cmp2fldsbynum(dtIn, f1n[1], f2n[1], verbose=verbose)); +} + +deldupflds <- function(dtIn, f1=names(dtIn), f2=NA, tolNA=TRUE) { # delete one of 2 fields if they are "identical" + #dtOut <- dtIn; + lf1 <- length(f1); + lf2 <- length(f2); + diff <- rep(F, nrow(dtIn)); + if (lf1==1 & lf2==1) {diff <- diff | del2dupflds(dtIn, f1, f2, tolNA);} + else if (lf1>0 & is.na(f2[1])) { + for (i in f1) {diff <- diff | del2dupflds(dtIn, i, NA, tolNA);} + } + else if (lf1>1 & lf2>1 & lf1==lf2) { + for (i in 1:lf1) { + diff <- diff | del2dupflds(dtIn, f1[i], f2[i], tolNA); + } + } + else stop('Unexpected number of arguments!'); + + + invisible(diff); +} + + +# strictly compares 2 fields +cmp2dupflds <- function(dtIn, f1, f2){ + if (length(f1)!=1) stop('cmp2dupflds: one value expected in f1, ', length(f1), ' received: ', f1); + if (length(f2)!=1) stop('cmp2dupflds: one value expected in f2, ', length(f2), ' received: ', f2); + if (identical(dtIn[[f1]],dtIn[[f2]])) return(TRUE); + return(FALSE); +} + +cleanXY <- function(dtIn, cols2check, rename=T, verbose=F){ + for (this.col in cols2check){ + f1x <- paste0(this.col,'.x'); + f1y <- paste0(this.col,'.y'); + if (f1x %!in% names(dtIn) | f1y %!in% names(dtIn)){ + if (verbose==T) {cat('Column not found! ',this.col,'\n');} + next; + } # e.if col not found + + if (cmp2dupflds(dtIn, f1x, f1y)){ + if (verbose==T) {cat('Identical: ',this.col,'\n');} + dtIn[, (f1y):=NULL]; + if (rename==T) setnames(dtIn,f1x,this.col) + } + } # e. for + invisible(dtIn) +} + + +clean2flds.bak <- function(dtIn, f1, f2=NA, rezfname=f1, guess=F, verbose=T){ + #cat(f1, 'vs', f2,'... '); + if (length(f1)!=1) stop('clean2flds.bak: one value expected in f1, ', length(f1), ' received: ', f1); + if (length(f2)!=1) stop('clean2flds.bak: one value expected in f2, ', length(f2), ' received: ', f2); + f1.old <- f1; + rezfname <- rezfname; # we need to evaluate it here! Otherwise it will evaluate too late + if (is.na(f2)) {f1 <- paste0(f1,'.x'); f2 <- paste0(f1.old,'.y'); } + + diff <- rep(F, nrow(dtIn)); + if (! f1 %in% names(dtIn)){ + if (verbose) cat(f1, 'not found!\n'); + if (guess) { + if (paste0(f1,'.x') %in% names(dtIn)) f1 <- paste0(f1,'.x'); + } else return(NULL); + } + + if (! f2 %in% names(dtIn)){ + if (verbose) cat(f2, 'not found! '); + if (guess) { + if (paste0(f2,'.y') %in% names(dtIn)) f2 <- paste0(f2,'.y'); + } else return(NULL); + } + + diff <- cmp2flds(dtIn, f1, f2, verbose=verbose); + + if (sum(diff)==0){ + cat(": equal, deleting.\n"); + dtIn <- dtIn[, (f2):=NULL] + if (rezfname!=f1) { +# if (verbose) cat(' Renaming! ', f1, rezfname) + setnames(dtIn,f1,rezfname); + } + } + +} + + +cleandupflds <- function(dtIn, f1=names(dtIn), f2=NA, scanall=T, guess2=T, verbose=T) { # delete one of 2 fields if they are "identical" + if (all(is.na(f1))) {f1 <- names(dtIn);} else {f1 <- intersect(f1, names(dtIn));} + lf1 <- length(f1); + lf2 <- length(f2); + + if (lf2==1 & is.na(f2)){ + if (scanall==T) {f2 <- (names(dtIn) %-% i);} + } + + for (i in f1) { # for each column in f1 + #if (verbose) + cat('\n',i,'... '); + values1 <- dtIn[[i]]; + if (is.factor(values1)) {values1 <- as.character.factor(values1);} + if ('Date' %in% class(values1)) {values1 <- as.character(values1);} + + f2.add <- NULL; + if (guess2==T){ + if (grepl('.x$',i)) {f2.add <- gsub('.x$','.y',i);} + } + f2.all <- c(f2, f2.add); + for (j in (f2.all %-% i)){ + if (! j %in% names(dtIn)) next; + if (verbose) cat(' vs ', j,'... ') + values2 <- dtIn[[j]]; + if (is.factor(values2)) {values2 <- as.character.factor(values2);} + if ('Date' %in% class(values2)) {values2 <- as.character(values2);} + if (identical(values1, values2)){ + cat(i,'equals',j,'. Deleting',j,'\n'); + #if (j %in% f2.add) {setnames(dtIn, i, )} + dtIn[, (j):=NULL] + f2.all <- (f2.all %-% j); + } + } # e. for j + } # e. for i + + # if (lf1==1 & lf2==1) {diff <- diff | clean2dupflds(dtIn, f1, f2, guess2=guess2, tolNA=tolNA);} + invisible(diff); +} + +reordcols <- function(dtIn, first=NULL, last=NULL) { + nfirst <- nlast <- NULL; + if (sum(duplicated(names(dtIn)))>0) warning('Duplicated names!'); + for (.col in first){ + nfirst <- c(nfirst, which(names(dtIn)==.col)) + } + + for (.col in last){ + nlast <- c(nlast, which(names(dtIn)==.col)) + } + + nrest <- which(!( names(dtIn) %in% unique(c(first,last)) )) + rez <- c(nfirst,nrest,nlast); + dtIn <- dtIn[, rez, with=F] + invisible(dtIn) +} + + + +# deluselesscol: deletes column(s) if they contain only one value (i.e. no diff between records) #### +deluselesscol <- function (dtIn, icolnames=names(dtIn), ignNA=F, silent = F, padON=F, padW=NULL, padSide='right') { + cols2del <- NULL; + if (padON==T & is.null(padW)) padW <- max(nchar(icolnames)); + for (colname in icolnames) { + values <- dtIn[[colname]]; + # (!any(is.na(values)) && all(values==values[1])) + if ( all(is.na(values)) || isTRUE(all(values==values[1], na.rm = ignNA))) { + cols2del <- c(cols2del, colname); + if (silent==F){ + col_print <- paste0("[", colname, "]"); + padded <- ifelse1(padON==F, col_print, strpad(col_print,padW+2L)) + cat(padded,"is all equal to: ", dtIn[[colname]][1],'\n'); + } + } + } # e. for + if (!is.null(cols2del)) dtIn[, (cols2del):=NULL]; + invisible(dtIn); +} # e. deluselesscol() + + +mrgcols <- function(dtInput, f_scan, csep="; ", f_ndx="id", delold=T, noNA=T, noE=T) { + #dtInput <- copy(dtInput); + if (missing(f_scan)) {f_scan <- setdiff(names(dtInput),f_ndx);} # if fields-to-scan are not defined, we scan all except index + newfield <- paste0(f_scan[1], ".tmp"); + dtInput[, (newfield):={ + rez <- NULL; + for (i in 1:ncol(.SD)) { + val <- .SD[[i]][1]; + if (!is.na(val) & val!="") {rez <- c(rez, cs(.SD[[i]][1], csep));} else + if ( is.na(val) & noNA==F) {rez <- c(rez, "NA");} else + if (val=="" & noE==F) {rez <- c(rez, "A");} + } + rez <- unique(rez); + rez <- paste0(rez, collapse=csep); + rez; + }, by=f_ndx, .SDcols=f_scan] + + if (delold==TRUE) { + dtInput[,(f_scan):=NULL]; # delete old fields + setnames(dtInput, newfield, f_scan[1]);# put old names + } + invisible(dtInput); +} # f_end mrgcols3 + +mergerows <- function(dtInput, f_ndx, f_scan, csep=";", delold=TRUE) { + dtInput <- data.table(dtInput); # copy to new table + if (missing(f_scan)) {f_scan <- setdiff(names(dtInput),f_ndx);} # if fields-to-scan are not defined, we scan all except index + newfields <- paste0(f_scan, "_S"); + cat('Index by:', f_ndx, '; merging fields:\n', f_scan, '\n'); + mrg.cnt <- 0L; + dtInput[, (newfields):={ + #print(paste0("N=", .N, "; ncol=", ncol(.SD))); + rez2 <- list(); # each member of the list will be a column + mrg.cnt <- rep(0L, ncol(.SD)); # counter for merges made + for (i in 1:ncol(.SD)) { + #print(paste0(" i: ", i)); + allvals <- ifelse(is.factor(.SD[[i]]), as.character.factor(.SD[[i]]), as.character(.SD[[i]])); + first <- allvals[1]; + #print(paste0(" first: ", first)); + if (all(is.na(allvals))) { + #print(" allNA"); + pasted <- NA_character_; + } else if (!any(is.na(allvals)) & all(allvals==first)){ + pasted <- first; + #print("all equal to first"); + } else { + #print("merging!"); + #print(.SD[[i]]); + pasted <- paste0(allvals, collapse=csep); + mrg.cnt[i] <- mrg.cnt[i] + 1L; + #print(pasted); + } + #cat("pasted:", pasted); + rez2 <- c(rez2, list(rep(pasted,.N))); + } # end for i 1..ncol + rez2; + }, by=f_ndx, .SDcols=f_scan] + if (delold==TRUE) { + #cat("old names:", names(dtInput), '\n'); + dtInput[,(f_scan):=NULL]; # delete old fields + setnames(dtInput, newfields, f_scan);# put old names + dtInput <- dtInput[!duplicated(dtInput)]; + #cat("new names:", names(dtInput), '\n'); + } + print(mrg.cnt); + invisible(dtInput); +} # f_end mergerows + + +mergerows <- function(dtInput, f_ndx, f_scan, csep=";", delold=TRUE) { + dtInput <- data.table(dtInput); # copy to new table + if (missing(f_scan)) {f_scan <- setdiff(names(dtInput),f_ndx);} # if fields-to-scan are not defined, we scan all except index + newfields <- paste0(f_scan, "_S"); + # cat('Index by:', f_ndx, '; merging fields:\n', f_scan, '\n'); + #mrg.cnt <- 0L; + dtInput[, (newfields):={ + #print(paste0("N=", .N, "; ncol=", ncol(.SD))); + rez2 <- list(); # each member of the list will be a column + #mrg.cnt <- rep(0L, ncol(.SD)); # counter for merges made + # cat("N:", .GRP, '; keyval:', .BY[[1]], '.N =', .N, '\n'); + for (i in 1:ncol(.SD)) { + # cat(" i:", i, '=',names(.SD)[i]); + if (.N<2 | all.same(.SD[[i]])) { + pasted <- .SD[[i]][1]; + pasted <- as.character(pasted); + # cat(' equal\n'); + } else { + # cat(' diff! Len:', length(.SD[[i]]),'...'); + allvals <- as.character(.SD[[i]]); + pasted <- paste0(allvals, collapse=csep); + # mrg.cnt[i] <- mrg.cnt[i] + 1L; + # cat(length(allvals), pasted,'\n'); + } # end of else + #cat("pasted:", pasted); + rez2 <- c(rez2, list(rep(pasted,.N))); + } # end for i 1..ncol + rez2; + }, by=f_ndx, .SDcols=f_scan] + if (delold==TRUE) { + #cat("old names:", names(dtInput), '\n'); + dtInput[,(f_scan):=NULL]; # delete old fields + setnames(dtInput, newfields, f_scan);# put old names + dtInput <- dtInput[!duplicated(dtInput)]; + #cat("new names:", names(dtInput), '\n'); + } + # print(mrg.cnt); + invisible(dtInput); +} # f_end mergerows + +mergebyrownames <- function(x,y){ + df <- merge(x, y, by='row.names', all=T); + row.names(df) <- df$Row.names; + df$Row.names <- NULL; + df; +} + + +compcols <- function(inptab1, inptab2){ + commoncols <- intersect(colnames(inptab1), colnames(inptab2)); + diff1 <- setdiff(colnames(inptab1), colnames(inptab2)); + diff2 <- setdiff(colnames(inptab2), colnames(inptab1)); + if (length(diff1)>0) {cat("only in table1:", diff1, "\n");} + if (length(diff2)>0) {cat("only in table2:", diff2, "\n");} + for (thiscol in commoncols){ + cat(thiscol, ": ", sep=""); + cat(identical(inptab1[, thiscol], inptab2[, thiscol]), "\n"); + } +} + +cleannames <- function(inputnames, dotLeading=T, dotTrailing=T, dotMulti=T, spaceTo=''){ + if (!is.na(spaceTo)) inputnames <- (gsub(" +",spaceTo,inputnames)); + if (dotTrailing==T) inputnames <- gsub( "\\.+$","", inputnames); # just remove trailing "." + if (dotLeading==T) inputnames <- gsub("^\\.+", "", inputnames); # just remove trailing "." + if (dotMulti==T) inputnames <- gsub( "\\.+", ".", inputnames); # change ".." to "." + + inputnames; +} + +dtcleannames <- function(dtIn, worknames=names(dtIn),...){ + setnames(dtIn, worknames, cleannames(worknames,...)); + invisible(dtIn); +} + + +# group records by fields defined in 'bys', +# then in each group leaves only records +# having minimal total amount of NAs in fields defined in 'countnames' fields +leave.most.info <- function(dtIn, bys, countnames=NULL, keep=F){ + if (is.null(countnames)) {.countnames <- !(names(dtIn) %in% bys);} + else .countnames <- (names(dtIn) %in% countnames); + not.na.count <- !is.na(dtIn[, .countnames, with=F]); + dtIn[, .tmp_info_score:=apply(not.na.count, 1, sum)] + dtIn[, '.tmp_delit':={ + delit <- F; + if (.N>1){ + delit <- (get('.tmp_info_score') < max(get('.tmp_info_score'))); + } + delit; + }, by=bys, with=F] + cat('Deletion: ', sum(dtIn$.tmp_delit==T),'\n'); + #dtIn[, ':='(my.N=.N, my.grp=.GRP), by=bys, with=F] + if (keep) return(dtIn); + dtIn <- dtIn[.tmp_delit==F,][,c('.tmp_delit', '.tmp_info_score'):=NULL] + return(dtIn) +} + +# DON'T USE IT!!! Use cSplit() instead!!! +# dt1 <- fread("V1 V2 V3 V4 +# a b;cc;d e f +# c d;e h j +# x d;e uu kk") +#dtIn=dt1; col2split='V2'; sep=';'; +#dtIn=copy(dt.fullprot); col2split='Gene names (primary )'; sep=';'; +# DON'T USE IT!!! Use cSplit() from splitstackshape instead!!! +splitcol2rows <- function(dtIn, col2split, sep, req=T){ + warning("DON'T USE IT!!! Use cSplit() from splitstackshape instead!!!"); + dtOut <- dtIn; + orinames <- c(names(dtOut)); # must make a copy of names() to avoid its update! + orikeys <- key(dtOut); + if(length(col2split)>1){ + warning('Multiple columns to split!\n'); + for (eachcol in col2split){ + dtOut <- splitcol2rows(dtOut, eachcol, sep, req) + }# end for + return(dtOut); + } # end if + if (!is.character(col2split)) error('Column name required!'); + if (!(col2split %in% names(dtOut))) error('Column not found: ', col2split); + dt.split <- dtOut[, + .(tmp.add.col=rep(unlist(strsplit(get(col2split),sep,T)), .N)), + #('new1'=.N), + by=col2split] + dt.split <- unique(dt.split, by=NULL); + setkeyv(dt.split, col2split) + setkeyv(dtOut, col2split) + dtOut <- dt.split[dtOut,]; # not dtOut[dt.split,] !!! + dtOut[, c(col2split):=NULL]; + setnames(dtOut, 'tmp.add.col', col2split); + setcolorder(dtOut, orinames); + setkeyv(dtOut, orikeys); + return(dtOut); +} + + +# read all files and merge into one data.table +mergefiles <- function(fn_files, fill=T, fn.col=NULL, FUN=fread, ...){ + final.dt <- NULL; + for (this.file in fn_files){ + this.dt <- FUN(this.file, ...); # read file with chosen function: fread() by default + if (!is.null(fn.col)) this.dt[,(fn.col):=this.file]; # add column with file name + if (!is.null(final.dt)) { + final.dt <- rbind(final.dt, this.dt, fill=fill); + } else final.dt <- this.dt; + } + return(final.dt) +} + + + + +mergefiletabs <- function(fn.inpdir, fn.mask='.*', fn.list=NULL, full.names = T, ...){ + + if (is.null(fn.list)) + fn.list <- list.files(fn.inpdir, fn.mask, include.dirs = FALSE, full.names = TRUE) + + dt.all <- NULL; + + for (fn.this in fn.list){ + fn.this.show <- ifelse(full.names==T, fn.this, basename(fn.this)) + dt.this <- flexread(fn.this, ...) + dt.this[, ffn:=fn.this.show] + dt.all <- rbind(dt.all, dt.this) + } + invisible(dt.all); +} + + + + +# loadOrBuild - if file exists, then loads the table from it, otherwise re-buils the table #### +# inDT inpuit parameter is a function call, but in fact the function is only evaluated if the file doesn't exist => saves time +loadOrBuild <- function (fnDT, inDT, ...){ + nm.load <- c(names(formals(fread)), names(formals(loadDT))); + nm.save <- c(names(formals(write.table)), names(formals(save_DT))); + dots <- list(...); + if (file.exists(fnDT)){ + cat('File found:', fnDT, '; will load it instead of building a new table.\n'); + return( + do.call('loadDT', + c( + list(fnInput = fnDT), + dots[names(dots) %in% nm.load] + ) + ) # instead of loadDT(fnDT, ...) + ); + } else { + cat('File not found:', fnDT, '; we\'ll build new table and then save it.\n'); + do.call('save_DT', + c(list(dtIn=inDT, fnSaveTo = fnDT), + dots[names(dots) %in% nm.save]) + ) # instead of save_DT(inDT, fnDT, ...); + return(inDT); + } +} + +# loadDTlazy: loads dt from file only if it is not defined yet #### +# previously known as load_DT, should be replaced everywhere +loadDTlazy <- function(dtVar, fnVar=NULL, fnDef, refresh=T, colsExcl=NULL, ...) { + varnameDT <- deparse(substitute(dtVar)); + varnameFN <- deparse(substitute(fnVar)); + newfn <- ""; + if ((exists(varnameDT) | varnameDT=='.') & refresh!=T) { + cat("DT (", varnameDT, ") already exists!\n"); + dtOut <- dtVar;# copy(dtVar) doesn't work :/ + } else { + if (refresh==T) {cat("Existing DT (", varnameDT, ") ignored! ");} else {cat("DT (", varnameDT, ") does not exist! ");} + if (exists(varnameFN)) { + cat("filename variable (", varnameFN,") is defined! \n"); + newfn <- fnVar; + } else { + cat("filename (", varnameFN,") was not defined before! \n"); + if (!missing(fnDef)) {newfn <- fnDef;} else {cat("Default filename not set!\n")}; + } + dtOut <- loadDT(fnInput = newfn, colsExcl=colsExcl, ...); + } + return(dtOut); +} + + +# setnamessp #################################### +# usual setnames() requires that all old names are present in the table, +# setnamessp() tolerates missing names +setnamessp <- function(dtIn, old, new, verbose=T){ + foundOld <- old %in% names(dtIn); + newDups <- new[foundOld] %in% names(dtIn); # if old_col is supposed to be renamed to new_col while new_col already exists in dtIn + if (sum(newDups)>0) warning('Warning! those column already existed: ', new[newDups]); + if (sum(foundOld)>0){ + setnames(dtIn, old[foundOld], new[foundOld]); + if (verbose) {cat(sum(foundOld), ' names changed:\nFrom:', old[foundOld], '\n to:', new[foundOld], '\n');} + } +} + + + +# combines all values from selected columns and returns set of unique +getlevels <- function(dtIn, cols){ + ret.levels <- NULL; + for (this.col in cols){ + ret.levels <- c(ret.levels, + ifelse1(is.factor(dtIn[[this.col]]), + levels(dtIn[[this.col]]), + unique(dtIn[[this.col]])) + ) + + } # end for + return(unique(ret.levels)); +} + +cnames <- function(dtIn) return(c(names(dtIn))); # copy names() to avoid updates + + +setfirstcol <- function(dtIn, firstcols){ + setcolorder(dtIn, c(firstcols, (names(dtIn) %-% firstcols)) ) +} + +names.comm <- function(dt1,dt2) {return(intersect(names(dt1),names(dt2)))} +names.diff <- function(dt1,dt2) {return(setdiff(names(dt1),names(dt2)))} + +findnamesrange <- function(inpDT,name1,name2, values=F){ + col1 <- which(names(inpDT)==name1) + col2 <- which(names(inpDT)==name2) + if (values==T) {return(names(inpDT)[col1:col2]);} + else return(col1:col2); +} + + + +cast.fun <- function(inp.dt, cols2cast, FUN){ + cols2castY <- intersect(cols2cast,names(inp.dt)) + if (!identical(cols2castY,cols2cast)) warning('Columns not found: ', paste0(setdiff(cols2cast,names(inp.dt)), collapse = ' '),'\n') + inp.dt[, (cols2castY) := lapply(.SD, FUN), .SDcols = cols2castY] + return(inp.dt); +} + +cast.char <- function(inp.dt, cols2cast){ + return(cast.fun(inp.dt,cols2cast,as.character)); +} + +cast.num <- function(inp.dt, cols2cast){ + return(cast.fun(inp.dt,cols2cast,as.numeric)); +} + +cast.factor <- function(inp.dt, cols2cast){ + return(cast.fun(inp.dt,cols2cast,as.factor)); +} + + +each.row <- function(inp.dt) seq_len(nrow(inp.dt)); diff --git a/R/eSet.R b/R/eSet.R new file mode 100644 index 0000000..8006eb7 --- /dev/null +++ b/R/eSet.R @@ -0,0 +1,111 @@ + +# extends phenoData of given eSet by adding a data.table +attachpData <- function(es, dat, by.dat=NULL, by.es=NULL, reqUniqESkeys=T){ + + # if no fields indicated, then add complete [dat] to pData + if (is.null(by.es) & is.null(by.dat)) { + pData(es) <- cbind(pData(es), dat); + return(invisible(es)); + } # return & exit + + if (!is.data.table(dat)) dat <- data.table(dat); + + # + if (!is.null(by.dat)){ + keyvals <- dat[[by.dat]]; + if (anyDuplicated(keyvals)) { + print(keyvals[duplicated(keyvals)]); + stop('Duplicated key values found in dat!'); + } + + dat <- data.table(dat); + setkeyv(dat, by.dat); + } + + # if key.col is defined only for dat, we use sampleNames(es), otherwise defined by.es col + es.keys <- ifelse1(is.null(by.es), sampleNames(es), pData(es)[[by.es]]) + if (anyDuplicated(es.keys) & reqUniqESkeys) { + print(es.keys[duplicated(es.keys)]) + warning('Duplicated es.keys found!'); + } + + dat.use <- dat[es.keys,]; + dat.use <- dat.use[,-c(by.dat),with=F]; # #dat.use <- dat.use[,(names(dat.use) %-% by.dat),with=F]; # + + names.dup <- colnames(dat.use) %&% varLabels(es); # colnames() instead of ... in case of matrix + if (length(names.dup)>0) { + pData(es) <- pData(es)[,(varLabels(es) %-% names.dup)] + warning('Replaced columns in phenoData: ', paste(names.dup, collapse = ', ')) + } + + pData(es) <- cbind(pData(es), dat.use); + invisible(es); +} + + + + + + + + + + + + + +attachpData0 <- function(es, dat, keycol, es.keycol=NULL){ + #if (!is.data.table(dat)) dat <- data.table(dat); + + if (missing(keycol)){ + keyvals0 <- row.names(dat); + } else { + keyvals0 <- dat[[keycol]] + setkeyv(dat,keycol); + } + + es.keys <- ifelse1(is.null(es.keycol), sampleNames(es), pData(es)[[es.keycol]]) + if (anyDuplicated(es.keys)) { + print(es.keys[duplicated(es.keys)]) + stop('Duplicated es.keys found!'); + } + + names.nof <- es.keys %-% keyvals0; + if (length(names.nof)>0) + warning('phenoData IDs not found in key: ', paste(names.nof, collapse = ', ')) + + dat.use <- dat[es.keys,]; + + if (missing(keycol)){ + keyvals <- row.names(dat.use); + } else { + keyvals <- row.names(dat.use) <- dat.use[[keycol]]; + dat.use <- dat.use[,-c(keycol),with=F]; + } + + if (anyDuplicated(keyvals)) stop('Duplicated key values found!'); + + names.dup <- colnames(dat.use) %&% varLabels(es); # colnames() instead of colnames() in case of matrix + if (length(names.dup)>0) stop('Columns already in phenoData: ', paste(names.dup, collapse = ', ')) + + pData(es) <- cbind(pData(es), dat.use); + invisible(es); +} + + +exprext <- function(es.input, cols2add=NULL){ + mtx.expr <- t(exprs(es.input)); # 608 + dat2add <- ifelse1(is.null(cols2add), pData(es.input), pData(es.input)[cols2add]) + return(data.table(cbind(dat2add,mtx.expr))) +} + + +es.rename.f <- function(es, kcol){ # rename features + featureNames(es) <- fData(es)[[kcol]] + invisible(es) +} + +es.rename.s <- function(es, kcol){ # rename samples + sampleNames(es) <- pData(es)[[kcol]] + invisible(es) +} diff --git a/R/graph.R b/R/graph.R new file mode 100644 index 0000000..66c43b0 --- /dev/null +++ b/R/graph.R @@ -0,0 +1,7 @@ +# reset graphic state +resetPar <- function() { + dev.new() + op <- par(no.readonly = TRUE) + dev.off() + op +} diff --git a/R/hello.R b/R/hello.R new file mode 100644 index 0000000..c5c4158 --- /dev/null +++ b/R/hello.R @@ -0,0 +1,18 @@ +# Hello, world! +# +# This is an example function named 'hello' +# which prints 'Hello, world!'. +# +# You can learn more about package authoring with RStudio at: +# +# http://r-pkgs.had.co.nz/ +# +# Some useful keyboard shortcuts for package authoring: +# +# Build and Reload Package: 'Ctrl + Shift + B' +# Check Package: 'Ctrl + Shift + E' +# Test Package: 'Ctrl + Shift + T' + +hello <- function() { + print("Hello, world! 2") +} diff --git a/R/loading.R b/R/loading.R new file mode 100644 index 0000000..dcec7bf --- /dev/null +++ b/R/loading.R @@ -0,0 +1,145 @@ +testX <- function(){ + print('Running! 2'); +} + + + +# from list of locations, tries to load source +loadAny <- function(...){ + arglist <- unlist(list(...)); + cat(' ', length(arglist), 'locations provided.\n') + ndx <- 0L; + for (arg in arglist){ + ndx <- ndx + 1L; + ndx_s <- paste0(' [', ndx, ']'); + cat(ndx_s, 'Trying to load ', arg, '...\n'); + + rez.src <- ''; + tryCatch( + expr = {rez.src <<- source(arg);}, + warning = function(w){rez.src <<- (paste0('Warrning: ', w$message, '\n'));}, + error = function(e){rez.src <<- (paste0('Errrrror: ', e$message, '\n'));}, + finally = {cat(rez.src);} + ); + + if (rez.src=='') {cat('Sourced successfully! (',arg,')\n'); return(T);} else + if (grepl('Warrning: package .* was built under R version .*', rez.src)) {cat('Sourced with minor warning. (',arg,')\n'); return(T);} else + cat('Sourced unsuccessfully! (',arg,')\n'); + } # e. for + + stop('All source() calls failed!') + +} # e. loadAny() + + +req <- function(packagename, verbose=T){ + if (length(packagename)>1) { # recursively process vector/list + if (verbose) cat('Requested', length(packagename), 'packages:', paste0(packagename, collapse = ','),'\n'); + ndx <- 0L; + for (eachpackagename in packagename) { + ndx <- ndx + 1L; + ndx_s <- paste0('(', ndx, ')'); + if (verbose) cat(ndx_s); + req(eachpackagename); + } + if (verbose) cat("Finished loading", length(packagename), "packages.\n"); + return(T); + } + + packagename <- unlist(strsplit(packagename, " ", fixed=T)); + if (length(packagename)>1) { + if (verbose) cat('Splitting package name:', length(packagename), "names.\n"); + req(packagename); + return(T); + } + + if (verbose) cat(" Checking package ", packagename, "... "); + if(packagename %in% rownames(installed.packages()) == TRUE) { + if (verbose) cat('already installed... ') + } else { + if (verbose) cat('Not installed! Trying to install... \n'); + install.packages(packagename); + if (verbose) cat("installation finished, checking... "); + if(packagename %in% rownames(installed.packages()) == FALSE) {warning('Still failed!\n'); return(FALSE);} + } + + if (paste0('package:',packagename) %in% search()) { + if (verbose) cat('already loaded!\n'); return(TRUE); + } + + reqrez <- TRUE; + tryrez <- tryCatch( + expr = {reqrez <<- require(packagename, character.only = T);}, + warning = function(w){ + cat('Warning: ', w$message, '\n'); + if (grepl('there is no package called', w$message)) { + reqrez <<- FALSE; + } else {reqrez <<- require(packagename, character.only = T);} # very ugly... + }, + error = function(e){ + cat('Error: ', e$message, '\n'); + reqrez <<- FALSE; + }, + finally = cat("finished with", packagename, ".\n") + ); + if (verbose) cat('\ntryrez: ', tryrez, 'reqrez: ', reqrez, '\n'); + #if (is.null(reqrez)) reqrez <- FALSE; + + if (reqrez) {if (verbose) cat("Success!\n"); return(T);} +} + +reload <- function(pkgName){ + pkgNameP <- paste0('package:',pkgName); + print(pkgNameP); + if(pkgNameP %in% search()) + { + #pkgNameP <- "package:ggplot2"; + print('package found! will be detached. v5'); + #detach(name = pkgNameP, unload=TRUE); + unloadNamespace(pkgName); + } + req(pkgName); +} + + + +# trylocs: +# for given list of files, try (one-by-one) if they exist; +# returns either first existing (with all=FALSE, default) +# or all existing (with all=TRUE) +# if none found, shows warning or error (with req=TRUE) +trylocs <- function(..., req=F, all=F){ + arglist <- unlist(list(...)); + rez <- c() + for (elem in arglist){ + if (file.exists(elem)) { + if (all==F) return(elem) else {rez <- c(rez, elem);} + } + } + if (length(rez)>0) return(rez); + warning('None of the locations found!') + if (req==T) error('None of the locations found!') + return(NULL); +} + + +getvloc <- function(){ + locs <- list(); + locs['~/../AppData/Roaming/locconfig/dell'] <- 'dell'; + locs['~/../AppData/Roaming/locconfig/T560'] <- 'T560'; + locs['~/../AppData/Roaming/locconfig/helix'] <- 'helix'; + locs['~/../AppData/Roaming/locconfig/atran'] <- 'atran'; + + for (loc in names(locs)){ + if (file.exists(loc)) {return(locs[[loc]]);} + } + return(""); +} + + +req('data.table magrittr'); + +vai <- function(){ + req('data.table magrittr'); +} + diff --git a/R/polyA.R b/R/polyA.R new file mode 100644 index 0000000..fc3e065 --- /dev/null +++ b/R/polyA.R @@ -0,0 +1,78 @@ +mergaxtmaps <- function(dir) { + #dir <- "d:/OutDocs/BigData/Genome/UCSC/hs vs pantro/"; + if (substr(dir, nchar(dir), nchar(dir))=="/") dir <- substr(dir, 1, nchar(dir)-1); # chop trailing '/' + if (!file.exists(dir)) stop("Directory not found!"); + mask <- "*net.axt.map$"; + flist <- list.files(path=dir, pattern=mask, full.names=TRUE); + if (length(flist)==0) stop("no files found!"); + dtaxtmap <- fread(flist[1], sep="\t"); + dtaxtmapFull <- dtaxtmap[0,]; # just to initialize columns + + # merge all *.axt.map files to single table dtaxtmapFull + before<-proc.time()[3]; + for (fnInpAXTmap in flist) { + if (file.exists(fnInpAXTmap)) { + dtaxtmap <- fread(fnInpAXTmap, sep="\t"); + dtaxtmapFull <- rbind(dtaxtmapFull, dtaxtmap); + } # end if file exists + } # end for + rm(fnInpAXTmap); rm(dtaxtmap); + after<-proc.time()[3]; (after-before); rm(before); rm(after); + return(dtaxtmapFull); +} + + +goverlap <- function (dtLeft, dtRight, + chrL="chr", chrR="chr", + startL="start", endL="end", + startLsh=0, endLsh=0, + startR="hex5p", endR="hex5p", + startRsh=0, endRsh=5) { + dtLeft [[chrL]] <- factor(dtLeft [[chrL]]) # factorize 'chromosome' field + dtRight[[chrR]] <- factor(dtRight[[chrR]]) # factorize 'chromosome' field + mylevels <- unique(c(levels(dtLeft[[chrL]]), levels(dtRight[[chrR]]))) # + # dtLeft$tmp.rowid <- seq_len(nrow(dtLeft)); setkey(dtLeft, 'tmp.rowid') + dtRight$tmp.rowid <- seq_len(nrow(dtRight)); setkey(dtRight, 'tmp.rowid') + if (anyNA(dtLeft [[startL]])) stop('NA in dtLeft[[', startL, ']]!'); + if (anyNA(dtLeft [[endL ]])) stop('NA in dtLeft[[', endL, ']]!'); + if (anyNA(dtRight[[startR]])) stop('NA in dtRight[[',startR, ']]!'); + if (anyNA(dtRight[[endR ]])) stop('NA in dtRight[[', endR, ']]!'); + irLeft <- IRanges(dtLeft [[startL]]+startLsh, dtLeft [[endL]]+endLsh) + irRight <- IRanges(dtRight[[startR]]+startRsh, dtRight[[endR]]+endRsh) + rleLeft <- Rle(factor(dtLeft [[chrL]], levels=mylevels)) + rleRight <- Rle(factor(dtRight[[chrR]], levels=mylevels)) + grL <- GRanges(rleLeft, irLeft) + grR <- GRanges(rleRight, irRight) + + ovlaps = findOverlaps(grR, grL) # 75687 + + dtLeftHits <- dtLeft[subjectHits(ovlaps)]; # 75687 # don't re-set key! + dtLeftHits[, rightHit := queryHits(ovlaps)] # + setkey(dtLeftHits, rightHit) + + dtRightHits <- dtLeftHits[dtRight, nomatch=NA, allow.cartesian = TRUE] # 90190 o_O # that's where we need to have key tmp.rowid! + #print(names(dtRightHits)); + if (chrL==chrR) {deldupflds(dtRightHits, chrL);} else {deldupflds(dtRightHits, chrL, chrR);} + setkey(dtRightHits, rightHit) + #print(names(dtRightHits)); + return(dtRightHits); +} ### goverlap + + + + + + + +# names_old <- cs(' ugenellid symbol gnum dist hexSeq') +# names_new <- cs('g.ugenellid g.symbol g.gnum h.dist h.hexSeq') + +#names_old <- c(cs(' llid siteid sitenum s2PDU s3PDU cleavage site1 unigene.id unigene2'), 'supporting EST', names_old) +#names_new <- c(cs('s.llid s.siteid s.sitenum s.s2PDU s.s3PDU s.cleavage s.site1 s.unigene s.unigene2'), 's.ESTs', names_new) + +#names_old <- c(cs('adjFreq hexseqOK posNewClv adjHexSeqRef adjHexSeqAlt'), names_old) +#names_new <- c(cs(' MAF hexSeqOK posClvNew hexSeqMajor hexSeqMinor '), names_new) + +#names_old <- c(cs(' adjHexSeqRef adjHexSeqAlt '), names_old) +#names_new <- c(cs(' hexSeqMajor hexSeqMinor '), names_new) + diff --git a/R/stat.R b/R/stat.R new file mode 100644 index 0000000..5387a02 --- /dev/null +++ b/R/stat.R @@ -0,0 +1,212 @@ + +se <- function(input, na.rm=T) { + if(na.rm==T) {input <- na.omit(input);} + return(sd(input)/sqrt(length(input))); +} + +perc.ranks <- function(x) trunc(rank(x))/length(x) + + +NA_get <- function(obj){ + #if (class(obj)=="numeric") return(NA_real_); + #if (class(obj)=="logical") return(NA_); +} + +percent <- function(num1, ndig=2){ + paste0(round(100*num1, digits=ndig),"%") +} + +tab <- function(input, useNA='ifany', na.rm=F, do.sort=T, ...){ + if (useNA==F | na.rm==T) useNA <- 'no'; + if (useNA==T | na.rm==F) useNA <- 'ifany'; + + name1 <- deparse(substitute(input)); + df <- data.frame(table(input, useNA = useNA, ...)); + df <- df[df$Freq!=0,] + sum1 <- sum(df$Freq); + df$FreqP <- percent(df$Freq/sum1); + print(names(df)); + df <- data.table(df); + setnames(df, 'input', name1); # names(df)[names(df)=='input'] <- name1; + print(order(-df$Freq)); + df <- df[order(-df$Freq)]; + print(names(df)); + return(df); +} + +tabDF <- function(input, useNA='ifany', na.rm=F, do.sort=T, ...){ + if (useNA==F | na.rm==T) useNA <- 'no'; + if (useNA==T | na.rm==F) useNA <- 'ifany'; + + name1 <- deparse(substitute(input)); + df1 <- data.frame(table(input, useNA = useNA, ...)); + df1 <- df1[df1$Freq!=0,] + sum1 <- sum(df1$Freq); + df1$FreqP <- percent(df1$Freq/sum1); +# print(names(df)); + names(df1)[names(df1)=='input'] <- name1; +# print(order(-df$Freq)); + if (do.sort) df1 <- df1[order(-df1$Freq),]; +# print(names(df)); + return(df1); +} + +tabDT <- function(input, useNA='ifany', do.sort=T, ...){ + if (useNA==F) useNA <- 'no'; + name1 <- deparse(substitute(input)); + dt1 <- data.table(table(input, useNA = useNA, ...)); + setnames(dt1, 'N', 'Freq') + print(names(dt1)); + print(is.data.table(dt1)) + dt1 <- dt1[Freq!=0,] + sum1 <- sum(dt1$Freq); + dt1[, FreqP:=percent(Freq/sum1)]; + setnames(dt1, 'input', name1); +# print(order(-df$Freq)); +# print(names(dt)); + if (do.sort) dt1 <- dt1[order(Freq),]; +# print(names(dt1)); + return(dt1); +} + +tab <- tabDF; + + +# test1 <- function(input){ +# name1 <- deparse(substitute(input)); +# dt1 <- data.table(table(input, useNA = "ifany")); +# setnames(dt1, 'N', 'Freq') +# print(names(dt1)); +# print(is.data.table(dt1)) +# dt1 <- dt1[Freq!=0,] +# } +# +# test2 <- function(inpdt){ +# dt1 <- copy(inpdt) +# setnames(dt1, 'N', 'Freq') +# print(names(dt1)); +# print(is.data.table(dt1)) +# dt1 <- dt1[Freq!=0,] +# } +# +# test2a <- function(inpdt){ +# dt1 <- copy(inpdt) +# print(names(dt1)); +# print(is.data.table(dt1)) +# dt1 <- dt1[N!=0,] +# } +tabv <- function(...){View(tab(...))} + +mean2sd <- function(x) mean(x)+2*sd(x); + +outlier.tukey <- function(data, x, k=1.5){ + lowerq = unname(quantile(data)[2]); # Q1 + upperq = unname(quantile(data)[4]); # Q3 + iqr = upperq - lowerq # IQR(data) + upper = iqr*k + upperq + lower = lowerq - iqr*k + if (!missing(x)) {return(x>upper | xref.val); +} +#vec1 <- c(189,167,187,183,186,182,181,184,181,177)/1000 +#outlier.Dixon(vec1,.167,0.95) + + +# revPCA: transposes input data and returns PCA #### +revPCA <- function(inpdata, colexcl, sample, ...){ + if (class(colexcl)=="character") {colexcl <- match(colexcl, names(inpdata));} + colexcl <- colexcl[!is.na(colexcl)]; # remove NA + if (class(sample) =="numeric" ) {sample <- names(inpdata)[sample];} + this_df <- data.frame(inpdata)[-(colexcl)]; + row.names(this_df) <- this_df[[sample]]; + this_df[[sample]] <- NULL; + this_df <- t(this_df); + return(prcomp(this_df, ...)); +} + + +# avg(): like mean() but takes all arguments (not just the first one) #### +avg <- function(...){ + mean(c(...)); +} + +cut_my <- function(values,n){ + probs1 <- seq(0,1,1/n); + q_norm <- quantile(values,probs=probs1,na.rm = T) + + if (anyDuplicated(q_norm)>0){ + breaks <- c(0,quantile(values[values!=0],probs=probs1,na.rm = T)) + labs <- 0:(length(breaks)-2) + } else { + breaks <- q_norm; + labs <- 1:n + } + if (anyDuplicated(breaks)) { + warning('Breaks are not unique! '); + labs <- labs[!duplicated(breaks)[-1]] + breaks <- breaks[!duplicated(breaks)]; + #return(NULL) + } + q <- cut(values, breaks = breaks, include.lowest = TRUE, right=FALSE, labels = labs) + return(q); +} + +cut_va0 <- function(values,n) +{ + values[values==0] <- NA + cuts <- cut(values, quantile(values,probs=seq(0,1,1/n),na.rm = T), labels = F, include.lowest = T) + cuts[is.na(cuts)] <- 0 + return(cuts) +} + +add_q <- function(inpDT, inpCols, q=10L, verbose=F){ + inpDT <- copy(inpDT); + for (this.col in inpCols){ + if (verbose==T) cat(this.col,'\n') + if (this.col %!in% names(inpDT)){warning(this.col, ' not found!\n'); next;} + vals <- inpDT[[this.col]] + name_q <- this.col %+% '_q' %+% as.character(q); + qvals <- as.numeric(cut_my(vals, q)); + inpDT[, (name_q):=qvals] + } + return(inpDT) +} + + diff --git a/R/surv.R b/R/surv.R new file mode 100644 index 0000000..d3cdb52 --- /dev/null +++ b/R/surv.R @@ -0,0 +1,131 @@ +getsurvtrends <- function(surv.input, survFormS, cols){ + cols_not_found <- cols %-% names(surv.input); + if (length(cols_not_found)>0) { + warning('Fields not found! ', paste(cols_not_found, collapse = ' ')); + cols <- cols %&% names(surv.input); + } + + #if ('formula' %!in% class(survForm)) survForm <- as.formula(survForm); + + summarize.cox.gene <- function(survFormBase, inp.data, gene){ + survFormX <- as.formula(paste(survFormBase, gene, sep = ' + ')) + summarize.cox(inp.cox = coxph(survFormX, data = inp.data),filtS = gene) + } + + #summarize.cox.gene(survFormS, dt.expr, 'SUSD3') + + rez <- lapply(X = cols, + FUN = function(x) summarize.cox.gene(survFormBase=survFormS, inp.data=surv.input, gene = x)) + + do.call(rbind, rez) + +} + + + +summarize.cox <- function(inp.cox, filtS){ + coefs <- as.data.frame(summary(inp.cox)$coefficients) + confInts <- as.data.frame(summary(inp.cox)$conf.int) + if (nrow(coefs)>1) { + coefs <- coefs[filtS,] + confInts <- confInts[filtS,] + } + coefs$CIl <- confInts[,3]; + coefs$CIh <- confInts[,4]; + setnames(coefs, 'Pr(>|z|)', 'p') + #setnames(rez_tab0, cs('exp(coef) coef se(coef)'), cs('HR lnHR se.lnHR')); + return(coefs); +} + + + +surv.formulaS <- function(inp.time, inp.event, inp.factors=NULL){ + f.str <- paste0('Surv(', inp.time, ', ', inp.event, ') ~ ') + if (!is.null(inp.factors)) f.str <- paste0(f.str, paste(inp.factors, collapse = ' + ')) + return(f.str) +} + +surv.formula <- function(inp.time, inp.event, inp.factors=NULL){ + f.str <- paste0('Surv(', inp.time, ', ', inp.event, ') ~ ') + if (!is.null(inp.factors)) f.str <- paste0(f.str, paste(inp.factors, collapse = ' + ')) + return(as.formula(f.str)) +} + + +# getsurvforthisq #### +# +getsurvforthisq <- function(inp.dataset, surv.form, this.field, this.q){ + if (this.field %!in% names(inp.dataset)) {warning(this.field, ' not found!'); return(NULL);} + if ('this.isHigh' %in% names(inp.dataset)) {stop(' this.isHigh in table fields!'); return(NULL);} + + cat(' ',this.q,': '); + + if (!is.character(surv.form)) surv.form <- deparse(surv.form) + thisG.qvals <- inp.dataset[[this.field]] + inp.dataset <- inp.dataset[!is.na(thisG.qvals),] ; # NEW!!! + thisG.qvals <- inp.dataset[[this.field]] + this.isHigh <- (as.numeric(as.character(thisG.qvals)) > this.q) + cat(' ',sum(this.isHigh, na.rm = T),'; '); + this.cox <- coxph(as.formula(surv.form %+% '+ this.isHigh'), data=inp.dataset); + coefs <- summarize.cox(this.cox,"this.isHighTRUE"); + coefs$Gene <- this.field + coefs$q <- this.q + coefs$nhi <- sum(this.isHigh); + coefs$nlo <- sum(!this.isHigh); + #cat(coefs$p,'!; ') + #dt.rez0 <- ifelse1(is.null(dt.rez0), coefs, rbind(dt.rez0,coefs)); + return(coefs) +} + +myggsurvplot <- function(inp.survfit, legend.title = '', ...){ + ggsurvplot(inp.survfit, + break.time.by = 5, xlab = 'Time (years)', + risk.table = T, risk.table.pos = 'in', risk.table.title = 'Numbers at risk', + tables.col = 'strata', legend.title = legend.title, + ...)} + + + +# build_surv_plot() - previously was in [build_surv_plot function.R] file +#inp.field = 'SUSD3_q10' +#threshold=3L +build_surv_plot <- function(surv.input,inp.field,threshold,form.time,form.evnt,form.cofactors){ + sform1 <- surv.formula(form.time,form.evnt,form.cofactors) + sform2 <- surv.formula(form.time,form.evnt,'this.isHigh') + sform2s <- surv.formulaS(form.time,form.evnt,'this.isHigh') + + #this.isHigh <- surv.input[[inp.field]] > threshold; + vecHigh <- surv.input[[inp.field]] > threshold; + + + coefs <- getsurvforthisq(surv.input, surv.form = deparse(sform1), this.field = inp.field, this.q=threshold) + + surv.input[, this.isHigh:=vecHigh] + + + lab.s <- sprintf('HR = %.1f [%.1f - %.1f]', coefs[,2], coefs$CIl, coefs$CIh) + lab.s <- lab.s %+% ifelse(coefs$p<1e-3, sprintf('\np = %.2e',coefs$p), sprintf('\np = %.3f',coefs$p)) + lab.n <- sprintf('cutoff = %1.0f0%%', coefs$q) + + sf <- survfit(sform2, data=surv.input) + # sf$call$data <- (deparse(substitute(surv.input))); # freaking magic + + sf1 <- survfit(as.formula('Surv(OS_YRS, isBRCA) ~ this.isHigh'), data=surv.input) + sf2 <- survfit(as.formula(sform2s), data=surv.input) + + p <- ggsurvplot(sf1, palette = cs('black red'), + break.time.by = 5, xlab = 'Time (years)', + tables.col = 'strata', risk.table = T, risk.table.pos = 'in', legend = 'none') + + + surv.input[, this.isHigh:=NULL] + + #p$plot <- p$plot + annotate('text', x=mean(p$plot$coordinates$limits$x), y=1, label=this.g, size=10) + + p$plot <- p$plot + annotate('text', x=0, y=0.4, label=lab.s, hjust=0, size=6) + p$plot <- p$plot + annotate('text', x=0, y=0.3, label=lab.n, hjust=0, size=5) + + #p$plot <- p$plot + annotate('text', x=0, y=0.03, label='n.hi = ' %+% sum(this.isHigh), color='red', hjust=0) + #p$plot <- p$plot + annotate('text', x=0, y=0.00, label='n.lo = ' %+% sum(this.isHigh), hjust=0) + return(p) +} diff --git a/R/text.R b/R/text.R new file mode 100644 index 0000000..ad97955 --- /dev/null +++ b/R/text.R @@ -0,0 +1,139 @@ +testab <- function(){ + print ('a' %+% 'b') +} + + + +cs <- function(inputstr, sep=",", fix=T, nonewlines=T){ + if (missing(sep) & grepl(',', inputstr)==F & grepl(' ', inputstr)==T) {sep=" ";} + if (nonewlines) inputstr <- gsub("[\n\r]+", sep, inputstr); + rez <- unlist(strsplit(inputstr, sep, fixed=fix)); + return(rez[rez!=""]); +} + +printcs <- cs1 <- function(input){paste0(cs(input), collapse = ' ')} + + +rightstr <- function(x, n){substr(x, nchar(x)-n+1, nchar(x))} +substrRight <- rightstr + +substr1 <- function(inpStr) {substring(inpStr,1,1)} + +strpad <- function(inpstr,padW,padSide='right'){ + pads <- strrep(' ',padW - nchar(inpstr)); + switch(padSide, right = paste0(inpstr,pads), left = paste0(pads, inpstr)) +} + + +# sub_str <- function(inputstr, start=NA, stop=NA, len=NA){ +# tmp = NA +# if (tmp>0) print(">!") +# } + +# detach("package:vautils", unload=TRUE) +# detach("package:ggplot2", unload=TRUE) +# library(vautils) +# f2() +# detach("package:vautils", unload=TRUE) +# library(ggplot2) +# library(vautils) +# f2() +# + +# "+" = function(x,y) { +# if(is.character(x) || is.character(y)) { +# return(paste(x , y, sep="")) +# } else { +# .Primitive("+")(x,y) +# } +# } + + +`%+%` <- function(...) UseMethod("%+%") +`%+%.character` <- paste0 +`%+%.default` <- function (arg1, arg2){ + e <- parent.env(getEnvByName(.GlobalEnv,'package:vautils')); + if (exists('%+%', envir = e)) get('%+%',envir = e)(arg1,arg2); +} + +# %+%: strings concatenation +# "%+%" <- function(arg1, arg2){ +# if (is.character(arg1) & is.character(arg2)) { +# paste0(arg1, arg2); +# } else { +# e <- parent.env(parent.env(.GlobalEnv)); +# # cat(".G:",environmentName(e),'\n') +# while (environmentName(e) != 'R_EmptyEnv' & !exists('%+%', envir = e)) { +# # print(environmentName(e)) +# e <- parent.env(e); +# } # e. while +# if (environmentName(e) != 'R_EmptyEnv'){ +# old.func <- get('%+%',envir = e) +# old.func(arg1,arg2); +# } +# } # e. else +# } + +prn <- function(...){ # just print(..., collapse=""); - with CRLF at the end + arglist <- list(...); + cat(paste0(unlist(arglist), collapse=""),'\n'); + invisible(NULL); +} + +prnc <- function(...){ # just print(..., collapse=""); - no CRLF at the end + arglist <- list(...); + cat(paste0(unlist(arglist), collapse="")); + invisible(NULL); +} + + +ask <- function(...){ + readline(prompt=paste0(unlist(list(...)), collapse="")); +} + + + +deperc <- function(str_perc){ + return(as.numeric(gsub('([\\d\\.\\+\\-]*)%','\\1',str_perc))/100); +} + + +# removes trailing slash (for dir path) #### +chops <- function(inputstr){ + return(gsub("(.*)/", "\\1", inputstr)); +} + +# chop character +chopLeft <- function(inpstr,n=1L){ + return(substring(inpstr,n+1L)); +} + +chopRight <- function(inpstr,n=1L){ + return(substring(inpstr,1,nchar(inpstr)-n)); +} + + + + +askfilename <- function(fnInput=NULL, allowEmpty=F, prompt=NULL){ + if (!is.null(prompt)) cat(prompt); + + while (is.null(fnInput) || !file.exists(fnInput)) { + cat("File not found:", fnInput); + + if (allowEmpty==T) { + ans <- ask("File not found: ", fnInput, ".\n Enter another file name, or space (\" \") to leave it empty, or press Enter to try again, or ESC to exit: "); + if (ans!="") {fnInput <- ans;} + if (ans==" ") {return("");} + } else { + ans <- ask("File not found: ", fnInput, ".\n Enter another file name, or press Enter to try again, or ESC to exit: "); + if (ans!="") {fnInput <- ans;} + } # end else + } # end while + fnInput <- gsub('\\\\','/',fnInput); + return(fnInput); +} + + +asc <- function(x) { strtoi(charToRaw(x),16L) } +chr <- function(n) { rawToChar(as.raw(n)) } \ No newline at end of file diff --git a/R/utils.R b/R/utils.R new file mode 100644 index 0000000..aeaee5f --- /dev/null +++ b/R/utils.R @@ -0,0 +1,27 @@ +# from https://stackoverflow.com/questions/1358003/tricks-to-manage-the-available-memory-in-an-r-session +# improved list of objects +.ls.objects <- function (pos = 1, pattern, order.by, + decreasing=FALSE, head=FALSE, n=5) { + napply <- function(names, fn) sapply(names, function(x) + fn(get(x, pos = pos))) + names <- ls(pos = pos, pattern = pattern) + obj.class <- napply(names, function(x) as.character(class(x))[1]) + obj.mode <- napply(names, mode) + obj.type <- ifelse(is.na(obj.class), obj.mode, obj.class) + obj.size <- napply(names, object.size) + obj.dim <- t(napply(names, function(x) + as.numeric(dim(x))[1:2])) + vec <- is.na(obj.dim)[, 1] & (obj.type != "function") + obj.dim[vec, 1] <- napply(names, length)[vec] + out <- data.frame(obj.type, obj.size, obj.dim) + names(out) <- c("Type", "Size", "Rows", "Columns") + if (!missing(order.by)) + out <- out[order(out[[order.by]], decreasing=decreasing), ] + if (head) + out <- head(out, n) + out +} +# shorthand +lsos <- function(..., n=10) { + .ls.objects(..., order.by="Size", decreasing=TRUE, head=TRUE, n=n) +} \ No newline at end of file diff --git a/R/xml.R b/R/xml.R new file mode 100644 index 0000000..ae502e4 --- /dev/null +++ b/R/xml.R @@ -0,0 +1,15 @@ +my.xpathApply <- function(inpNode, inpPath, delim='; '){ + rez.list <- sapply(inpNode, FUN=xpathApply, path=inpPath, fun=xmlValue) + empty <- sapply(sapply(rez.list, unlist), is.null) + rez.list[empty] <- NA + + rez.list <- sapply(rez.list, unlist) + + nested <- (sapply(rez.list, length) > 1) + + rez.list[nested] <- sapply(rez.list[nested], paste, collapse=delim) + + return(unlist(rez.list)) +} + + diff --git a/R/zzz.R b/R/zzz.R new file mode 100644 index 0000000..157b49c --- /dev/null +++ b/R/zzz.R @@ -0,0 +1,6 @@ +.onLoad <- function(libname, pkgname){ + #xyz <- rnorm(10) ## dummy example + cat('\nzzz.R: Load package: ', pkgname,'from library:',libname,'\n'); + require('data.table'); + require('magrittr'); +} diff --git a/man/hello.Rd b/man/hello.Rd new file mode 100644 index 0000000..0fa7c4b --- /dev/null +++ b/man/hello.Rd @@ -0,0 +1,12 @@ +\name{hello} +\alias{hello} +\title{Hello, World!} +\usage{ +hello() +} +\description{ +Prints 'Hello, world!'. +} +\examples{ +hello() +} diff --git a/test/test1.R b/test/test1.R new file mode 100644 index 0000000..0b4a532 --- /dev/null +++ b/test/test1.R @@ -0,0 +1,22 @@ +# df <- data.frame(x=rnorm(100), y=rnorm(100)) +# +# detach("package:vautils", unload=TRUE) +# detach("package:ggplot2", unload=TRUE) +# detach("package:base64", unload=TRUE) +# +# library(ggplot2) +# library(vautils) +# library(base64) + +# +# 'a' %+% 'b' +# ggplot(data = df, aes(x=x, y=y)) %+% geom_point() +# +# # library(vautils) +# # f2() +# # detach("package:vautils", unload=TRUE) +# +# +# +# # Error: evaluation nested too deeply: infinite recursion / options(expressions=)? +# # Error during wrapup: evaluation nested too deeply: infinite recursion / options(expressions=)? diff --git a/vautils.Rproj b/vautils.Rproj new file mode 100644 index 0000000..497f8bf --- /dev/null +++ b/vautils.Rproj @@ -0,0 +1,20 @@ +Version: 1.0 + +RestoreWorkspace: Default +SaveWorkspace: Default +AlwaysSaveHistory: Default + +EnableCodeIndexing: Yes +UseSpacesForTab: Yes +NumSpacesForTab: 2 +Encoding: UTF-8 + +RnwWeave: Sweave +LaTeX: pdfLaTeX + +AutoAppendNewline: Yes +StripTrailingWhitespace: Yes + +BuildType: Package +PackageUseDevtools: Yes +PackageInstallArgs: --no-multiarch --with-keep.source