Migration script added.

ada-discovery · May 22, 2019 · 84e4762 · 84e4762
1 parent 75adbb1
commit 84e4762
Show file tree

Hide file tree

Showing 8 changed files with 369 additions and 2 deletions.
diff --git a/app-migrate/db/db-update-0.6.1 b/app-migrate/db/db-update-0.6.1
@@ -0,0 +1,22 @@
+db.getCollection('dataset_imports').find({'dataView': {$exists: true}}).forEach(function(item)
+{
+    item.dataView.isPrivate = false;
+    db.getCollection('dataset_imports').save(item);
+})
+
+db.getCollection('dictionaries').find().forEach(function(item)
+{
+    for(i = 0; i != item.dataviews.length; ++i)
+    {
+	var dataView = item.dataviews[i];
+ 	dataView.isPrivate = false;
+    }
+
+    for(i = 0; i != item.filters.length; ++i)
+    {
+	var filter = item.filters[i];
+ 	filter.isPrivate = false;
+    }
+
+    db.getCollection('dictionaries').save(item);
+})
diff --git a/app-migrate/db/db-update-0.6.3 b/app-migrate/db/db-update-0.6.3
@@ -0,0 +1,3 @@
+db.getCollection('dataset_imports').updateMany({concreteClass : "models.RedCapDataSetImport"},
+	{ $set: { "eventNames": [] } }
+)
diff --git a/app-migrate/db/db-update-0.6.4 b/app-migrate/db/db-update-0.6.4
@@ -0,0 +1,30 @@
+db.getCollection('dictionaries').find().forEach(function(item)
+{
+    for(i = 0; i != item.dataviews.length; ++i)
+    {
+        var dataView = item.dataviews[i]
+
+	if (dataView.useOptimizedRepoChartCalcMethod) {
+            dataView.generationMethod = "Repo and Full Data"
+        } else {
+            dataView.generationMethod = "Auto"
+        }
+        delete dataView.useOptimizedRepoChartCalcMethod
+    }
+
+    db.getCollection('dictionaries').save(item);
+})
+
+
+db.getCollection('dataset_imports').find({'dataView': {$exists: true}}).forEach(function(item){
+    var dataView = item.dataView
+
+    if (dataView.useOptimizedRepoChartCalcMethod) {
+        dataView.generationMethod = "Repo and Full Data"
+    } else {
+        dataView.generationMethod = "Auto"
+    }
+    delete dataView.useOptimizedRepoChartCalcMethod
+
+    db.getCollection('dataset_imports').save(item);
+})
diff --git a/app-migrate/db/db-update-0.7.0 b/app-migrate/db/db-update-0.7.0
@@ -0,0 +1,49 @@
+db.getCollection('dataset_settings').update({},
+	{$unset:{tranSMARTVisitFieldName:"", tranSMARTReplacements: ""}},
+	{multi: true}
+)
+
+db.getCollection('dictionaries').find().forEach(function(item)
+{
+    var toUpdate = false
+    for(i = 0; i != item.dataviews.length; ++i)
+    {
+	for(j = 0; j != item.dataviews[i].widgetSpecs.length; ++j)
+   	{
+	    var widgetSpec = item.dataviews[i].widgetSpecs[j];
+
+	    switch(widgetSpec.concreteClass) {
+	        case "models.CorrelationWidgetSpec" :
+                    toUpdate = true;
+                    widgetSpec.correlationType = "Pearson";
+                    break;
+	    }
+       }
+    }
+
+    if (toUpdate) {
+        db.getCollection('dictionaries').save(item);
+    }
+})
+
+db.getCollection('dictionaries').find().forEach(function(item)
+{
+    var toUpdate = false
+    for(i = 0; i != item.dataviews.length; ++i)
+    {
+	for(j = 0; j != item.dataviews[i].widgetSpecs.length; ++j)
+   	{
+	    var widgetSpec = item.dataviews[i].widgetSpecs[j];
+
+	    switch(widgetSpec.concreteClass) {
+	        case "models.TemplateHtmlWidgetSpec":
+                    widgetSpec.concreteClass = "models.CustomHtmlWidgetSpec";
+                    break;
+	    }
+       }
+    }
+
+    if (toUpdate) {
+        db.getCollection('dictionaries').save(item);
+    }
+})
diff --git a/app-migrate/db/db-update-0.7.1 b/app-migrate/db/db-update-0.7.1
@@ -0,0 +1,15 @@
+db.getCollection('classifications').find().forEach(function(item)
+{
+	var name = item.concreteClass.substr(item.concreteClass.lastIndexOf(".") + 1)
+        var newClassName = "org.incal.spark_ml.models.classification." + name
+        item.concreteClass = newClassName
+        db.getCollection('classifications').save(item);
+})
+
+db.getCollection('regressions').find().forEach(function(item)
+{
+	var name = item.concreteClass.substr(item.concreteClass.lastIndexOf(".") + 1)
+        var newClassName = "org.incal.spark_ml.models.regression." + name
+        item.concreteClass = newClassName
+        db.getCollection('regressions').save(item);
+})
diff --git a/app-migrate/db/db-update-0.7.2 b/app-migrate/db/db-update-0.7.2
@@ -0,0 +1,141 @@
+db.getCollection('dictionaries').find({'classificationResults': {$exists: true}}).forEach(function(item)
+{
+    var toUpdate = false
+    for(i = 0; i != item.classificationResults.length; ++i)
+    {
+	var result = item.classificationResults[i];
+	if (result["setting-trainingTestingSplit"]) {
+		result["setting-trainingTestSplitRatio"] = result["setting-trainingTestingSplit"]
+		delete result["setting-trainingTestingSplit"];
+		toUpdate = true;	 
+	}
+    }
+
+    if (toUpdate) {
+        db.getCollection('dictionaries').save(item);
+    }
+})
+
+db.getCollection('dictionaries').find({'regressionResults': {$exists: true}}).forEach(function(item)
+{
+    var toUpdate = false
+    for(i = 0; i != item.regressionResults.length; ++i)
+    {
+	var result = item.regressionResults[i];
+	if (result["setting-trainingTestingSplit"]) {
+		result["setting-trainingTestSplitRatio"] = result["setting-trainingTestingSplit"]
+		delete result["setting-trainingTestingSplit"];
+		toUpdate = true;	 
+	}
+    }
+
+    if (toUpdate) {
+        db.getCollection('dictionaries').save(item);
+    }
+})
+
+function replace(result, prefix, newPrefix, name) {
+        const oldName = prefix + "-" + name
+        const newName = newPrefix + "-" + name
+
+        if (result[oldName]) { 
+		print(oldName + " -> " + newName)
+ 		result[newName] = result[oldName]
+ 		delete result[oldName];
+	}
+}
+
+db.getCollection('dictionaries').find({'classificationResults': {$exists: true}}).forEach(function(item)
+{
+    var toUpdate = false
+    for(i = 0; i != item.classificationResults.length; ++i)
+    {
+	var result = item.classificationResults[i];
+	// io spec
+	replace(result, "setting", "runSpec-ioSpec", "inputFieldNames")
+	replace(result, "setting", "runSpec-ioSpec", "outputFieldName")
+	replace(result, "setting", "runSpec-ioSpec", "filterId")
+	replace(result, "setting", "runSpec-ioSpec", "replicationFilterId")
+
+	// ml model
+	replace(result, "setting", "runSpec", "mlModelId")
+
+	// learning setting
+	replace(result, "setting", "runSpec-learningSetting", "featuresNormalizationType")
+	replace(result, "setting", "runSpec-learningSetting", "featuresSelectionNum") 
+	replace(result, "setting", "runSpec-learningSetting", "pcaDims") 
+	replace(result, "setting", "runSpec-learningSetting", "trainingTestSplitRatio") 
+	replace(result, "setting", "runSpec-learningSetting", "samplingRatios") 
+	replace(result, "setting", "runSpec-learningSetting", "repetitions") 
+	replace(result, "setting", "runSpec-learningSetting", "crossValidationFolds") 
+	replace(result, "setting", "runSpec-learningSetting", "crossValidationEvalMetric") 
+	replace(result, "setting", "runSpec-learningSetting", "binCurvesNumBins")
+
+        toUpdate = true;
+    }
+
+    if (toUpdate) {
+        db.getCollection('dictionaries').save(item);
+    }
+})
+
+db.getCollection('dictionaries').find({'regressionResults': {$exists: true}}).forEach(function(item)
+{
+    var toUpdate = false
+    for(i = 0; i != item.regressionResults.length; ++i)
+    {
+	var result = item.regressionResults[i];
+	// io spec
+	replace(result, "setting", "runSpec-ioSpec", "inputFieldNames")
+	replace(result, "setting", "runSpec-ioSpec", "outputFieldName")
+	replace(result, "setting", "runSpec-ioSpec", "filterId")
+	replace(result, "setting", "runSpec-ioSpec", "replicationFilterId")
+
+	// ml model
+	replace(result, "setting", "runSpec", "mlModelId")
+
+	// learning setting
+	replace(result, "setting", "runSpec-learningSetting", "featuresNormalizationType")
+	replace(result, "setting", "runSpec-learningSetting", "outputNormalizationType")
+	replace(result, "setting", "runSpec-learningSetting", "pcaDims") 
+	replace(result, "setting", "runSpec-learningSetting", "trainingTestSplitRatio") 
+	replace(result, "setting", "runSpec-learningSetting", "repetitions") 
+	replace(result, "setting", "runSpec-learningSetting", "crossValidationFolds") 
+	replace(result, "setting", "runSpec-learningSetting", "crossValidationEvalMetric") 
+	replace(result, "setting", "runSpec-learningSetting", "collectOutputs")
+
+	if (!result["runSpec-learningSetting-collectOutputs"]) {
+		result["runSpec-learningSetting-collectOutputs"] = false
+	}
+
+        toUpdate = true;
+    }
+
+    if (toUpdate) {
+        db.getCollection('dictionaries').save(item);
+    }
+})
+
+db.getCollection('dictionaries').find({}).forEach(function(item)
+{
+    var toUpdate = false;
+    if (item.classificationResults) {
+        for(i = 0; i != item.classificationResults.length; ++i)
+        {
+            item.classificationResults[i].concreteClass = "org.incal.spark_ml.models.result.StandardClassificationResult";
+            toUpdate = true
+        }
+    }
+
+    if (item.regressionResults) {
+        for(i = 0; i != item.regressionResults.length; ++i)
+        {
+            item.regressionResults[i].concreteClass = "org.incal.spark_ml.models.result.StandardRegressionResult";
+            toUpdate = true
+        }
+    }
+
+    if (toUpdate) {
+        db.getCollection('dictionaries').save(item);
+    }
+})
diff --git a/app-migrate/migrate.sh b/app-migrate/migrate.sh
@@ -0,0 +1,107 @@
+#!/bin/bash
+
+echo "    ______         __           
+   /      \       |  \          
+  |  ######\  ____| ##  ______  
+  | ##__| ## /      ## |      \ 
+  | ##    ##|  #######  \######\\
+  | ########| ##  | ## /      ##
+  | ##  | ##| ##__| ##|  #######
+  | ##  | ## \##    ## \##    ##
+   \##   \##  \#######  \#######
+---------------------------------
+      MIGRATION ASSISTANT
+---------------------------------
+                                  "
+SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
+bold=$(tput bold)
+normal=$(tput sgr0)
+
+CUR_DIR=$SCRIPT_DIR/../
+APP_CONF_FILE=$CUR_DIR/conf/application.conf
+
+if [ ! -f $APP_CONF_FILE ];
+then
+    echo -e "\n>>> Something is wrong. This script was not run from an Ada installation dir.\n"	
+    exit 1
+fi
+
+APP_VERSION_LINE=$(grep -F "app.version" $APP_CONF_FILE)
+APP_VERSION=${APP_VERSION_LINE#*= }
+
+echo -e "Current Ada version is ${bold}$APP_VERSION${normal}.\n"
+
+if [[ $APP_VERSION < "0.7.0" ]];
+then
+    echo -e ">>> This script can be used only for Ada versions 0.7.0 and above.\n"	
+    exit 1
+fi
+
+PREV_DIR_CORRECT=""
+while [[ $PREV_DIR_CORRECT != "y" && $PREV_DIR_CORRECT != "Y" ]];
+do
+  # PREV_DIR="USER INPUT"
+  read -p "Enter the (root) dir of your previous Ada installation: " PREV_DIR
+
+  if [ -d $PREV_DIR ];
+  then
+    PREV_APP_CONF_FILE=$PREV_DIR/conf/application.conf
+    if [ -f $PREV_APP_CONF_FILE ];
+    then
+      PREV_APP_VERSION_LINE=$(grep -F "app.version" $PREV_APP_CONF_FILE)
+      PREV_APP_VERSION=${PREV_APP_VERSION_LINE#*= }
+      echo -e "\n>>> Found the app version ${bold}$PREV_APP_VERSION${normal} in the dir '$PREV_DIR'.\n"
+      if [[ $APP_VERSION > $PREV_APP_VERSION ]];
+      then	
+        # PREV_DIR_CORRECT="USER INPUT"
+        read -p "Is it correct [y/n]? " PREV_DIR_CORRECT
+      else
+        echo -e ">>> Cannot continue. Your (current) version must be newer than the source one.\n"
+      fi 
+    else
+      echo -e "\n>>> Cannot find an application config. The dir '$PREV_DIR' does not seem to belong to an Ada instalation.\n"
+    fi
+  else
+    echo -e "\n>>> The dir '$PREV_DIR' does not exist.\n"
+  fi
+done
+
+# Configuration files
+
+echo -e "\n>>> Copying configuration files...\n"
+
+cp $PREV_DIR/conf/custom.conf $CUR_DIR/conf/custom.conf
+cp $PREV_DIR/bin/set_env.sh $CUR_DIR/bin/set_env.sh
+
+PREV_TEMP_LINE=$(grep -F "ADA_TEMP=" $PREV_DIR/bin/runme)
+PREV_MEM_LINE=$(grep -F "ADA_MEM=" $PREV_DIR/bin/runme)
+
+sed "s|ADA_TEMP=.*|${PREV_TEMP_LINE}|g" $CUR_DIR/bin/runme > $CUR_DIR/bin/runme_x 
+sed "s|ADA_MEM=.*|${PREV_MEM_LINE}|g" $CUR_DIR/bin/runme_x > $CUR_DIR/bin/runme
+
+rm $CUR_DIR/bin/runme_x 
+
+# Copy extra folders, e.g. dataImports, and images
+
+cd $PREV_DIR
+
+echo -e ">>> Searching for extra sub dirs to copy..."
+
+for i in $(ls -d */);
+do 
+  SUB_DIR=${i%%/};
+  if [[ $SUB_DIR != "bin" && $SUB_DIR != "conf" && $SUB_DIR != "lib" && $SUB_DIR != "share" ]];
+  then
+    echo -e "\nCopying '$SUB_DIR'..."
+    cp -r $PREV_DIR/$SUB_DIR $CUR_DIR/$SUB_DIR
+  fi
+done
+
+# Mongo
+
+echo -e "\n>>> ${bold}Warning: DB migration has to be done manually. Please execute all the db-update scripts from > $PREV_APP_VERSION to $APP_VERSION${normal}.\n"
+
+echo -e ">>> Ada migration has been successfully finished!\n"
+
+echo "---------------------------------"
+echo -e "\n[[EnJoY Ada Discovery Analytics. Visit us at https://ada-discovery.org. Bye]]\n"
diff --git a/conf/core/data-access.conf b/conf/core/data-access.conf
@@ -44,6 +44,6 @@ elastic {
   threadpool.index.queue_size: 10000
   threadpool.bulk.size: ${?ADA_ELASTIC_THREADPOOL_BULK_SIZE} //${elasticdb.threadpool.bulk.size}
   threadpool.bulk.queue_size: 1000
-  scroll.batch.size: 200
-  scroll.doc_sort.use: false
+  scroll.batch.size: 10000
+  scroll.doc_sort.use: true
 }