dssg · nanounanue · Apr 2, 2019 · Apr 3, 2019 · Apr 5, 2019 · Apr 8, 2019
diff --git a/src/tests/postmodeling_tests/test_model_evaluator.py b/src/tests/postmodeling_tests/test_model_evaluator.py
diff --git a/src/tests/postmodeling_tests/test_model_group_evaluator.py b/src/tests/postmodeling_tests/test_model_group_evaluator.py
diff --git a/src/tests/postmodeling_tests/test_plots.py b/src/tests/postmodeling_tests/test_plots.py
@@ -0,0 +1,55 @@
+from triage.component.postmodeling import Model, ModelGroup, get_model, get_model_group, session
+from triage.component.postmodeling.plots import plot_roc, plot_precision_recall_n, plot_metric_over_time
+from triage.component.postmodeling.crosstabs import run_crosstabs
+from tests.utils import sample_config, populate_source_data, assert_plot_figures_added
+from triage.experiments import SingleThreadedExperiment
+import pandas as pd
+import pytest
+import os
+
+@pytest.fixture(scope="module")
+def model(shared_db_engine, shared_project_storage):
+    """Returns an instantiated ModelEvaluator available at module scope"""
+    populate_source_data(shared_db_engine)
+    base_config = sample_config()
+    # We need to have an ensemble model to test ModelEvaluator correctly
+    # so we can't use the finished_experiment fixture"""
+    base_config['grid_config'] = {
+        'sklearn.ensemble.ExtraTreesClassifier': {
+            'n_estimators': [10],
+            'criterion': ['gini'],
+            'max_depth': [1],
+            'max_features': ['sqrt'],
+            'min_samples_split': [2],
+        }
+    }
+    SingleThreadedExperiment(
+        base_config,
+        db_engine=shared_db_engine,
+        project_path=shared_project_storage.project_path
+    ).run()
+
+
+    session = create_session(shared_db_engine)
+
+    return get_model(1)
+
+@pytest.fixture(scope="module")
+def model_group(finished_experiment):
+    os.environ["DATABASE_URL"] = finished_experiment.db_engine.url
+
+    return get_model_group(1)
+
+
+def test_plot_metric_over_time(model_group):
+    with assert_plot_figures_added():
+        plot_metric_over_time(model_group, metric='precision', parameter='10_pct')
+
+
+def test_plot_precision_recall_n(model):
+    with assert_plot_figures_added():
+        plot_precision_recall_n(model)
+
+def test_plot_ROC(model):
+    with assert_plot_figures_added():
+        plot_ROC(model)
diff --git a/src/triage/component/postmodeling/README.org b/src/triage/component/postmodeling/README.org
@@ -0,0 +1,100 @@
+#+TITLE: Post-modeling analysis
+
+* Introduction
+
+/Postmodeling/ is about exploring the model group in a point of time
+(i.e. a model) or over time.
+
+Choosing the right model for deployment and exploring its predictions
+and behavior in time is a pivotal task. =postmodeling= will help to
+answer some of this questions by exploring the outcomes of the model,
+and exploring /deeply/ into the model behavior across time and
+features.
+
+This library lays at the end of the =triage= pipeline and will use the
+output of *audition* and some of its selection rules as a main
+input.
+
+* What you can do?
+
+** Compare model groups
+
+*** Metric
+
+*** Overlaps
+
+** Display the model group over time
+
+*** Metric
+
+*** Top Features
+
+*** Top predicted entities
+
+** Drill in a model group in a specific time
+
+*** Top Features
+
+*** Score distribution
+
+*** /Crosstabs/
+
+
+* New API
+
+We encapsulate the model groups from the database in two objects
+=ModelGroup= and =Model=.
+
+You could manipulate those objects using functions using functions:
+
+- =plot_XXXX(object, **kwargs)= for plotting, located in =plot.py=. The
+  arguments for this set of functions are =model= or a =model_group= and
+  they always return a =fig, ax= matplotlib objects so you can modify
+  them on the fly and tailor them for your specific purposes.
+
+* Use
+
+*NOTE*: This module use the =triage='s /default/ (and recommended) way of specify the
+database connection: an environment variable =DATABASE_URL= or it will
+try to read the connection file from a =DATABASE_FILE=.
+
+If you are playing with this module in Jupyter notebooks, remember
+that you can set environment variables with =%env=.
+
+
+Most of the time you will need to import these:
+
+#+BEGIN_SRC jupyter-python :session postmodeling
+from triage.component.postmodeling import get_model_group, get_model
+from triage.component.postmodeling.plots import plot_roc, plot_precision_recall_n, plot_metric_over_time
+#+END_SRC
+
+If you want to get a particular =ModelGroup=
+
+#+BEGIN_SRC jupyter-python :session postmodeling
+mg = get_model_group(model_group_id=14)
+#+END_SRC
+
+If you want to plot the performance over time:
+
+
+#+BEGIN_SRC jupyter-python :session postmodeling
+## mgs is a list of ModelGroups
+plot_metric_over_time(mgs, metric='precision', parameter='10_pct')
+#+END_SRC
+
+or an individual =Model=
+
+#+BEGIN_SRC jupyter-python :session postmodeling
+m = get_model(model_id = 5)
+#+END_SRC
+
+
+
+#+BEGIN_SRC jupyter-python :session postmodeling
+plot_precision_recall_n(m)
+#+END_SRC
+
+#+BEGIN_SRC jupyter-python :session postmodeling
+plot_ROC(m)
+#+END_SRC