Skip to content

Commit

Permalink
documentation update
Browse files Browse the repository at this point in the history
-updating vignettes
- updating demos
- removing files that are no longer used
  • Loading branch information
jreps committed Dec 16, 2021
1 parent 4f10c9f commit ec22cbf
Show file tree
Hide file tree
Showing 23 changed files with 1,711 additions and 1,689 deletions.
6 changes: 3 additions & 3 deletions R/SklearnClassifier.R
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,8 @@
# See the License for the specific language governing permissions and
# limitations under the License.

fitSklearn <- function(trainData,
classifierFunction = 'trainAdaBoost',
fitSklearn <- function(
trainData,
param,
search = "grid",
analysisId,
Expand Down Expand Up @@ -98,7 +98,7 @@ fitSklearn <- function(trainData,
tidyCovariates = attr(trainData$covariateData, "metaData")$tidyCovariateDataSettings,
requireDenseMatrix = attr(param, 'settings')$requiresDenseMatrix,
modelSettings = list(
model = classifierFunction,
model = pySettings$name,
param = param,
finalModelParameters = cvResult$finalParam,
extraSettings = attr(param, 'settings')
Expand Down
63 changes: 41 additions & 22 deletions demo/EnsembleModelDemo.R
Original file line number Diff line number Diff line change
@@ -1,8 +1,5 @@
library(PatientLevelPrediction)

# We need to have a writable folder for the ff objects
checkffFolder()

# This demo will generate a stacked ensemble consisting
# of a Logistic Regression and Random Forest model.
# Dependent on your system it can take some time to run
Expand All @@ -21,21 +18,18 @@ plpData <- simulatePlpData(
)

# Generate the study population
population <- createStudyPopulation(
plpData,
outcomeId = 2,
populationSettings <- createStudyPopulationSettings(
binary = TRUE,
firstExposureOnly = FALSE,
washoutPeriod = 0,
removeSubjectsWithPriorOutcome = FALSE,
priorOutcomeLookback = 99999,
requireTimeAtRisk = FALSE,
requireTimeAtRisk = TRUE,
minTimeAtRisk = 0,
riskWindowStart = 0,
addExposureDaysToStart = FALSE,
startAnchor = 'cohort start',
riskWindowEnd = 365,
addExposureDaysToEnd = FALSE,
verbosity = "INFO"
endAnchor = 'cohort start'
)

# Let's set the models and model building parameters
Expand All @@ -46,25 +40,50 @@ invisible(readline())
model1 <- setLassoLogisticRegression()
model2 <- setRandomForest()

# Specify a test fraction and a sequence of training set fractions
testFraction <- 0.2
# Specify the spilt settings
splitSettings <- createDefaultSplitSetting(
testFraction = 0.2,
nfold = 4,
splitSeed = 100 # this makes sure same split is done
)

# Specify the ensemble strategy
ensembleStrategy <- 'stacked'

# Specify the test split to be used
testSplit <- 'person'

# Now we build the stacked ensemble
cat("Press a key to continue")
invisible(readline())
ensembleResults <- PatientLevelPrediction::runEnsembleModel(population,
dataList = list(plpData, plpData),
modelList = list(model1, model2),
testSplit=testSplit,
testFraction=testFraction,
nfold=3, splitSeed=1000,
ensembleStrategy = ensembleStrategy)
ensembleResults <- runEnsembleModel(
ensembleStrategy = ensembleStrategy,
parallel = T,
maxCores = 2,
dataList = list(
plpData,
plpData
),
outcomeIds = list(2,2),
populationSettings = list(
populationSettings,
populationSettings
),
sampleSettings = list(
createSampleSettings(),
createSampleSettings()
),
featureEngineeringSettings = list(
createFeatureEngineeringSettings(),
createFeatureEngineeringSettings()
),
preprocessSettings = list(
createPreprocessSettings(),
createPreprocessSettings()
),
modelList = list(
model1,
model2
),
splitSettings = splitSettings
)

# You could now save the model and apply it on other data as described in more detail
# in the vignette.
76 changes: 37 additions & 39 deletions demo/LearningCurveDemo.R
Original file line number Diff line number Diff line change
Expand Up @@ -16,71 +16,69 @@ plpData <- simulatePlpData(
)

# Create the study population
population <- createStudyPopulation(
plpData,
outcomeId = 2,
populationSettings <- createStudyPopulationSettings(
binary = TRUE,
firstExposureOnly = FALSE,
washoutPeriod = 0,
removeSubjectsWithPriorOutcome = FALSE,
priorOutcomeLookback = 99999,
requireTimeAtRisk = FALSE,
requireTimeAtRisk = TRUE,
minTimeAtRisk = 0,
riskWindowStart = 0,
startAnchor = 'cohort start',
riskWindowEnd = 365,
verbosity = "INFO"
endAnchor = 'cohort start'
)

# Specify the prediction algorithm to be used
modelSettings <- setLassoLogisticRegression()

# Specify a test fraction and a sequence of training set fractions
testFraction <- 0.2
splitSettings <- createDefaultSplitSetting(
testFraction = 0.2,
type = 'stratified'
)
trainEvents <- seq(100, 800, 100)

# Specify the test split to be used
testSplit <- 'stratified'

# Create the learning curve object
if (selection != "y" &&
selection != "Y") {
learningCurve <- createLearningCurve(
population,
plpData = plpData,
modelSettings = modelSettings,
testFraction = testFraction,
verbosity = "TRACE",
plpData = plpData,
outcomeId = 2,
analysisId = 'learningCurveDemo',
parallel = F,
cores = 4,
modelSettings = modelSettings,
populationSettings = populationSettings,
splitSettings = splitSettings,
trainEvents = trainEvents,
splitSeed = 1000
saveDirectory = './learningCurve'
)

# plot the learning curve by specify one of the available metrics:
# 'AUROC', 'AUPRC', 'sBrier'.
plotLearningCurve(
learningCurve,
metric = "AUROC",
abscissa = "events",
plotTitle = "Learning Curve",
plotSubtitle = "AUROC performance"
)

} else {
# create a learning curve object in parallel
learningCurvePar <- createLearningCurvePar(
population,
plpData = plpData,
modelSettings = modelSettings,
testFraction = 0.2,
learningCurve <- createLearningCurve(
plpData = plpData,
outcomeId = 2,
analysisId = 'learningCurveDemo',
parallel = T,
cores = 4,
modelSettings = modelSettings,
populationSettings = populationSettings,
splitSettings = splitSettings,
trainEvents = trainEvents,
splitSeed = 1000
)

# plot the learning curve
plotLearningCurve(
learningCurvePar,
metric = "AUROC",
abscissa = "events",
plotTitle = "Learning Curve Parallel",
plotSubtitle = "AUROC performance"
saveDirectory = './learningCurve'
)

}

# plot the learning curve
plotLearningCurve(
learningCurve,
metric = "AUROC",
abscissa = "events",
plotTitle = "Learning Curve Parallel",
plotSubtitle = "AUROC performance"
)
63 changes: 35 additions & 28 deletions demo/SingleModelDemo.R
Original file line number Diff line number Diff line change
Expand Up @@ -2,42 +2,49 @@
library(PatientLevelPrediction)
devAskNewPage(ask = FALSE)

# We need to have a writable folder for the ff objects
checkffFolder()

### Simulated data from a database profile
set.seed(1234)
data(plpDataSimulationProfile)
sampleSize <- 2000
plpData <- PatientLevelPrediction::simulatePlpData(plpDataSimulationProfile, n = sampleSize)
plpData <- simulatePlpData(plpDataSimulationProfile, n = sampleSize)

### Define the study population
population <- PatientLevelPrediction::createStudyPopulation(plpData,
outcomeId = 2,
binary = TRUE,
firstExposureOnly = FALSE,
washoutPeriod = 0,
removeSubjectsWithPriorOutcome = FALSE,
priorOutcomeLookback = 99999,
requireTimeAtRisk = TRUE,
minTimeAtRisk = 0,
riskWindowStart = 0,
addExposureDaysToStart = FALSE,
riskWindowEnd = 365,
addExposureDaysToEnd = FALSE,
verbosity = "INFO")
populationSettings <- createStudyPopulationSettings(
binary = TRUE,
firstExposureOnly = FALSE,
washoutPeriod = 0,
removeSubjectsWithPriorOutcome = FALSE,
priorOutcomeLookback = 99999,
requireTimeAtRisk = TRUE,
minTimeAtRisk = 0,
riskWindowStart = 0,
startAnchor = 'cohort start',
riskWindowEnd = 365,
endAnchor = 'cohort start'
)

### Regularised logistic regression
lr_model <- PatientLevelPrediction::setLassoLogisticRegression()
lr_results <- PatientLevelPrediction::runPlp(population,
plpData,
modelSettings = lr_model,
testSplit = "time",
testFraction = 0.25,
nfold = 2,
verbosity = "INFO",
savePlpPlots = F,
saveDirectory = "./plpmodels")
lr_model <- setLassoLogisticRegression()
lr_results <- runPlp(
plpData = plpData,
outcomeId = 2,
analysisId = 'demo',
analysisName = 'run plp demo',
populationSettings = populationSettings,
splitSettings = createDefaultSplitSetting(
type = "time",
testFraction = 0.25,
nfold = 2
),
sampleSettings = createSampleSettings(),
preprocessSettings = createPreprocessSettings(
minFraction = 0,
normalize = T
),
modelSettings = lr_model,
executeSettings = createDefaultExecuteSettings(),
saveDirectory = "./plpdemo"
)


### Have a look at the results object.
Expand Down
36 changes: 0 additions & 36 deletions extras/PackageMaintenance.R
Original file line number Diff line number Diff line change
Expand Up @@ -84,39 +84,3 @@ rmarkdown::render("vignettes/CreatingShinyApp.Rmd",
toc = TRUE,
number_sections = TRUE))

# automatically creating create arg functions for json Create arg functions:
rCode <- c("# This file has been autogenerated. Do not change by hand. ")
rCode <- OhdsiRTools::createArgFunction("getPlpData",
excludeArgs = c("connectionDetails",
"cdmDatabaseSchema",
"oracleTempSchema",
"exposureDatabaseSchema",
"exposureTable",
"outcomeDatabaseSchema",
"outcomeTable",
"cdmVersion",
"cohortId",
"outcomeIds"),
rCode = rCode)
rCode <- OhdsiRTools::createArgFunction("createStudyPopulation",
excludeArgs = c("plpData", "population", "outcomeId"),
rCode = rCode)
rCode <- OhdsiRTools::createArgFunction("setModel", excludeArgs = c(), rCode = rCode)
rCode <- OhdsiRTools::createArgFunction("setAdaBoost", excludeArgs = c(), rCode = rCode)
rCode <- OhdsiRTools::createArgFunction("setDecisionTree", excludeArgs = c(), rCode = rCode)
rCode <- OhdsiRTools::createArgFunction("setGradientBoostingMachine",
excludeArgs = c(),
rCode = rCode)
rCode <- OhdsiRTools::createArgFunction("setKNN", excludeArgs = c(), rCode = rCode)
rCode <- OhdsiRTools::createArgFunction("setLassoLogisticRegression",
excludeArgs = c(),
rCode = rCode)
rCode <- OhdsiRTools::createArgFunction("setMLP", excludeArgs = c(), rCode = rCode)
rCode <- OhdsiRTools::createArgFunction("setNaiveBayes", excludeArgs = c(), rCode = rCode)
rCode <- OhdsiRTools::createArgFunction("setRandomForest", excludeArgs = c(), rCode = rCode)
rCode <- OhdsiRTools::createArgFunction("runPlp",
excludeArgs = c("population", "plpData", "modelSettings"),
rCode = rCode)

writeLines(rCode, "r/CreateArgFunctions.R")
OhdsiRTools::formatRFile("r/CreateArgFunctions.R")
Binary file removed extras/PatientLevelPrediction.pdf
Binary file not shown.
Loading

0 comments on commit ec22cbf

Please sign in to comment.