Skip to content

Commit

Permalink
Pull request #33: Feature/230fordashboard (HTTK-196)
Browse files Browse the repository at this point in the history
Merge in HTTK/httk-dev from feature/230fordashboard to dev

* commit '0252dd895cd1b6fcb2b8a0ac7433ca5aa8e88360':
  update the NEWS file to add in the Fabsgut variable change not previously provided in the NEWS file and provide better clarification
  fix typo - Wetmore et al. 2012 and 2015 ref
  Cleaned up dashboard script
  Correctly split RMSLE figures
  New predictions for CCD
  New predictions for CCD
  Split benchmark plots
  Finished running all chemicals
  Loading all QSARs introduces duplicates
  • Loading branch information
jfwambaugh authored and Sarah E. Davidson-Fritz committed Feb 7, 2024
2 parents e52b2ff + 0252dd8 commit 6e3da44
Show file tree
Hide file tree
Showing 8 changed files with 136,732 additions and 123,765 deletions.
4 changes: 4 additions & 0 deletions httk/NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,10 @@ permeability chemicals and therefore predicts only three values
for calculating systemic bioavailability as ***Fbio = Fabs * Fgut * Fhep***
where first-pass hepatic metabolism was already available from
`calc_hep_bioavailability`.
* Changed the name of the variable describing fraction absorbed from the gut
prior to first-pass hepatic metabolism to ***Fabsgut*** to reflect that
***Fabs*** and ***Fgut*** are now modeled separately
(that is, ***Fabsgut = Fabs * Fgut***).
* Integrated ***Fabs*** and ***Fgut*** into oral exposure for all TK models and
integrated into population variability and uncertainty functions within
`invitro_uv`
Expand Down
33 changes: 22 additions & 11 deletions httk/R/benchmark_httk.R
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,14 @@
#' calculated. If the units are correct the ratio should be 1 (within the
#' precision of the functions -- usually four significant figures). \cr
#'
#' rmsle.plot \tab A ggplot2 figure showing RMSLE tests of various functions.
#' invivo.rmsle.plot \tab A ggplot2 figure comparing
#' model predictions to in vivo measured values.
#' Output generated is the root mean square log10 error for parameters estimated
#' by the package. \cr
#'
#' model.rmsle.plot \tab A ggplot2 figure comparing various functions values
#' against values predicted by other models (chiefly SimCyp predictions from
#' Wetmore et al. 2012 and 2015.
#' Output generated is the root mean square log10 error for parameters estimated
#' by the package. \cr
#'
Expand Down Expand Up @@ -423,13 +430,6 @@ benchmark_httk <- function(
as.numeric(FitData2$Pred.Cmax) /
as.numeric(FitData2$Cmax)))
)

# write.table(FitData[,c(1,3,5,6,29,31)],
# file=paste("invivovcsscheck-",
# packageVersion("httk"),
# ".txt",sep=""),
# row.names=FALSE,
# sep="\t")
}

#
Expand Down Expand Up @@ -616,15 +616,26 @@ benchmark_httk <- function(
theme(axis.text.x = element_text(angle = 45, vjust = 0.5, hjust=1))
benchmarks[["units.plot"]] <- units.plot

model.rmsle.plot <-
ggplot2::ggplot(subset(plot.table, Benchmark %in%
c("RMSLE.Wetmore","RMSLE.noMC")),
aes(x=Version, y=Value, color=Benchmark, group=Benchmark)) +
geom_point() +
geom_line() +
ylab("Root Mean Squared Log10 Error (RMSLE)") +
theme(axis.text.x = element_text(angle = 45, vjust = 0.5, hjust=1))
benchmarks[["model.rmsle.plot"]] <- model.rmsle.plot

rmsle.plot <-
ggplot2::ggplot(subset(plot.table, regexpr("RMSLE", Benchmark)!=-1),
invivo.rmsle.plot <-
ggplot2::ggplot(subset(plot.table, Benchmark %in%
c("RMSLE.InVivoCss","RMSLE.InVivoAUC",
"RMSLE.InVivoCmax","RMSLE.TissuePC")),
aes(x=Version, y=Value, color=Benchmark, group=Benchmark)) +
geom_point() +
geom_line() +
ylab("Root Mean Squared Log10 Error (RMSLE)") +
theme(axis.text.x = element_text(angle = 45, vjust = 0.5, hjust=1))
benchmarks[["rmsle.plot"]] <- rmsle.plot
benchmarks[["invivo.rmsle.plot"]] <- invivo.rmsle.plot

count.plot <-
ggplot2::ggplot(subset(plot.table, regexpr("N.", Benchmark)!=-1),
Expand Down
Binary file added models/mod.RENAMEtoEXE
Binary file not shown.
15,765 changes: 8,217 additions & 7,548 deletions scripts/Dashboard-HTTK-CssunitsmgpL-change.txt

Large diffs are not rendered by default.

244,529 changes: 128,372 additions & 116,157 deletions scripts/Dashboard-HTTK-CssunitsmgpL.txt

Large diffs are not rendered by default.

Binary file modified scripts/Dashboard-HTTK-stamp.RData
Binary file not shown.
Binary file modified scripts/calc-mc-css-fold-change.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
166 changes: 117 additions & 49 deletions scripts/httk-for-dashboard.R
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
#R CMD BATCH --vanilla httk-for-dashboard.R

library(httk)
library(parallel)
library(data.table)
Expand Down Expand Up @@ -31,7 +33,7 @@ SPECIES.LIST <- c("Human","Rat")
MODELS.LIST <- c("3compartmentss","PBTK")

# How many processors are available for parallel computing:
NUM.CPU <- 6
NUM.CPU <- detectCores()-1

# Add the in silico predictions:
# Categorical QSPR:
Expand All @@ -43,6 +45,51 @@ load_pradeep2020()
# Caco-2 QSPR:
load_honda2023()

# Find all the duplicates:
# First eliminat NA DTXSIDs (can't do logical tests on them)
chem.physical_and_invitro.data <- subset(chem.physical_and_invitro.data,
!is.na(DTXSID))
# Now check for duplicated DTXSIDs:
dup.chems <- chem.physical_and_invitro.data$DTXSID[
duplicated((chem.physical_and_invitro.data$DTXSID))]
length(dup.chems)
# Now check for duplicated CASRN and grab the DTXSIDs of any chemicals with
# duplicated CAS (but unique DTXSIDs):
dup.chems <- unique(dup.chems,
subset(chem.physical_and_invitro.data, CAS %in%
chem.physical_and_invitro.data$CAS[
duplicated((chem.physical_and_invitro.data$CAS))])$DTXSID)
length(dup.chems)

# Among the duplicates, try to keep the predictions from Dawson 2021:
for (this.chem in dup.chems)
{
these.dup.rows <- which(chem.physical_and_invitro.data$DTXSID==this.chem)
keep.row <- NULL
for(this.row in these.dup.rows)
if (regexpr("Dawson",
chem.physical_and_invitro.data[this.row,
"Human.Clint.Reference"])!=-1
) keep.row <- this.row
these.dup.rows <- these.dup.rows[these.dup.rows!=keep.row]
chem.physical_and_invitro.data <- chem.physical_and_invitro.data[
-these.dup.rows,]
}

# Check to make sure no duplicates left:
chem.physical_and_invitro.data <- subset(chem.physical_and_invitro.data,
!is.na(DTXSID))
dup.chems <- chem.physical_and_invitro.data$DTXSID[
duplicated((chem.physical_and_invitro.data$DTXSID))]
length(dup.chems)
dup.chems <- unique(dup.chems,
subset(chem.physical_and_invitro.data, CAS %in%
chem.physical_and_invitro.data$CAS[
duplicated((chem.physical_and_invitro.data$CAS))])$DTXSID)
# This should be true:
length(dup.chems) == 0


# Organize HTTK data by species:
HTTK.data.list <- list()
all.ids <- NULL
Expand All @@ -62,12 +109,6 @@ for (this.species in SPECIES.LIST)
all.ids <- sort(unique(c(all.ids,HTTK.data.list[[this.species]]$DTXSID)))
}

# temporilay make it run fast:
ivpkfit <- read.csv("invivoPKfit-params.for.dashboard.txt")
short.list <- ivpkfit$Chemical[1:25]
all.ids <- all.ids[all.ids %in% short.list]


# We want one parameter per line, but the code is pretty different wrt how we
# retrieve/calculate these values:
param.list <- c("Clint","Fup","Vd","Days.Css","TK.Half.Life","Css")
Expand All @@ -79,8 +120,6 @@ units.list[["Days.Css"]] <- "Days"
units.list[["TK.Half.Life"]] <- "hours"
units.list[["Css"]] <- "mg/L"



# Function to create all the rows of info for a particular chemical:
make.ccd.table <- function(
this.id,
Expand Down Expand Up @@ -238,33 +277,13 @@ make.ccd.table <- function(
return(dashboard.table)
}

# Create a multicore cluster:
cl <- parallel::makeCluster(detectCores()-1)

# Load httk on all cores:
clusterEvalQ(cl, library(httk))
# Clear memory all cores:
clusterEvalQ(cl, rm(list=ls()))
# Load ADMet predicitons:
clusterEvalQ(cl, load_sipes2017())
# Define the table creator function on all cores:
clusterExport(cl, "make.ccd.table")
# Share data with all cores:
clusterExport(cl, c(
"HTTK.data.list",
"SPECIES.LIST",
"MODELS.LIST",
"param.list",
"units.list",
"all.ids",
"RANDOM.SEED",
"WHICH.QUANTILES",
"NUM.SAMPLES"))

# Create a list with one table per chemical:
dashboard.list <- clusterApply(cl,all.ids,function(x)
make.ccd.table(
this.id=x,
if (NUM.CPU == 1)
{
# Non-parallel version:
dashboard.list <- NULL
for (this.id in all.ids)
dashboard.list[[this.id]] <- make.ccd.table(
this.id=this.id,
HTTK.data.list=HTTK.data.list,
species.list=SPECIES.LIST,
model.list=MODELS.LIST,
Expand All @@ -274,9 +293,60 @@ dashboard.list <- clusterApply(cl,all.ids,function(x)
RANDOM.SEED=RANDOM.SEED,
which.quantiles=WHICH.QUANTILES,
num.samples=NUM.SAMPLES
))
)
} else {
# Create a multicore cluster:
cl <- parallel::makeCluster(NUM.CPU)

# Load httk on all cores:
clusterEvalQ(cl, library(httk))
# Clear memory all cores:
clusterEvalQ(cl, rm(list=ls()))

# Dawson 2021:
clusterEvalQ(cl, load_dawson2021())
# ADmet Predictor:
clusterEvalQ(cl, load_sipes2017())
# Machine learning model:
clusterEvalQ(cl, load_pradeep2020())
# Caco-2 QSPR:
clusterEvalQ(cl, load_honda2023())

# Define the table creator function on all cores:
clusterExport(cl, "make.ccd.table")
# Share data with all cores:
clusterExport(cl, c(
"chem.physical_and_invitro.data",
"HTTK.data.list",
"SPECIES.LIST",
"MODELS.LIST",
"param.list",
"units.list",
"all.ids",
"RANDOM.SEED",
"WHICH.QUANTILES",
"NUM.SAMPLES"))

# Create a list with one table per chemical:
dashboard.list <- clusterApply(cl,
all.ids,
function(x)
make.ccd.table(
this.id=x,
HTTK.data.list=HTTK.data.list,
species.list=SPECIES.LIST,
model.list=MODELS.LIST,
param.list=param.list,
units.list=units.list,
all.ids=all.ids,
RANDOM.SEED=RANDOM.SEED,
which.quantiles=WHICH.QUANTILES,
num.samples=NUM.SAMPLES
))

stopCluster(cl)
}

stopCluster(cl)
# Combine all the individual tables into a single table:
dashboard.table <- rbindlist(dashboard.list)

Expand Down Expand Up @@ -355,11 +425,11 @@ for (this.parameter in c(
ivpkfit[ivpkfit.row,"Ref"]
dashboard.table[dashboard.row,"Measured"] <- ivpkfit[ivpkfit.row,
this.parameter]
}
}
}
}
}
}
}


#
#
Expand Down Expand Up @@ -495,20 +565,18 @@ png("calc-mc-css-fold-change.png")
print(Fig)
dev.off()

# Duplicate column names in the two merged tables (dashboard.table, prev.table)
# have .x and .y assigned to them respectively.
tmp <- tmp[,colnames(tmp)[c(1:5,7:8,12:13,16)]]
colnames(tmp) <- gsub("x","prev",colnames(tmp))
colnames(tmp) <- gsub("y","new",colnames(tmp))
colnames(tmp) <- gsub("x","new",colnames(tmp))
colnames(tmp) <- gsub("y","prev",colnames(tmp))
write.table(
tmp,
file=paste(
"Dashboard-HTTK-CssunitsmgpL-change.txt",sep=""),
row.names=F,
quote=F,
sep="\t")







sessionInfo()

0 comments on commit 6e3da44

Please sign in to comment.