diff --git a/.gitignore b/.gitignore
index a806e4a..1eb45da 100644
--- a/.gitignore
+++ b/.gitignore
@@ -257,10 +257,7 @@ $RECYCLE.BIN/
 
 # Files created by data_ingestion or visualization
 modeling/features
-modeling/results*/*.pt
-modeling/results*/*.csv
-modeling/results*/*.txt
-modeling/results*/*.yml
+modeling/results*
 modeling/vectorized/*
 modeling/html
 
diff --git a/app.py b/app.py
index 87e84fa..fdf9901 100644
--- a/app.py
+++ b/app.py
@@ -79,8 +79,10 @@ def _annotate_timepoints(self, mmif: Mmif, **parameters) -> Mmif:
         all_positions = []
         t = time.perf_counter()
         # in the following, the .glob() should always return only one, otherwise we have a problem
+        ## naming convention from train.py + gridsearch.py = {timestamp}.{backbonename}.{prebinname}.pos{T/F}.pt
+        ## right now, `prebinname` is fixed to `nomap` as we don't use prebinning
         model_filestem = next(default_model_storage.glob(
-            f"*.{parameters['tpModelName']}.pos{'T' if parameters['tpUsePosModel'] else 'F'}.pt")).stem
+            f"*.{parameters['tpModelName']}.*.pos{'T' if parameters['tpUsePosModel'] else 'F'}.pt")).stem
         self.logger.info(f"Initiating classifier with {model_filestem}")
         classifier = classify.Classifier(default_model_storage / model_filestem,
                                          self.logger.name if self.logger.isEnabledFor(logging.DEBUG) else None)
@@ -123,6 +125,8 @@ def _annotate_timepoints(self, mmif: Mmif, **parameters) -> Mmif:
             timepoint_annotation.add_property('classification', classification)
 
     def _annotate_timeframes(self, mmif: Mmif, **parameters) -> Mmif:
+        from modeling.config import bins
+        
         TimeFrameTuple = namedtuple('TimeFrame', 
                                     ['label', 'tf_score', 'targets', 'representatives'])
         tp_view = mmif.get_view_contains(AnnotationTypes.TimePoint)
@@ -152,6 +156,13 @@ def _annotate_timeframes(self, mmif: Mmif, **parameters) -> Mmif:
         src_labels = sqh.validate_labelset(tps)
 
         # TODO: fill in `tfLabelMap` parameter value if a preset is used by the user
+        # first fill in labelMap parameter value if a preset is used by the user
+        label_map = bins.binning_schemes.get(parameters['tfLabelMapPreset'])
+        if label_map is None:
+            label_map = parameters['tfLabelMap']
+        else:
+            label_map = {lbl: binname for binname, lbls in label_map.items() for lbl in lbls}
+        parameters['tfLabelMap'] = label_map
         self.logger.debug(f"Label map: {parameters['tfLabelMap']}")
         label_remapper = sqh.build_label_remapper(src_labels, parameters['tfLabelMap'])
 
diff --git a/metadata.py b/metadata.py
index cb978b9..a2d8dc3 100644
--- a/metadata.py
+++ b/metadata.py
@@ -10,6 +10,7 @@
 from mmif import DocumentTypes, AnnotationTypes
 
 from modeling import FRAME_TYPES
+import modeling.config.bins
 
 default_model_storage = Path(__file__).parent / 'modeling/models'
 
@@ -26,16 +27,6 @@ def appmetadata() -> AppMetadata:
 
     available_models = default_model_storage.glob('*.pt')
 
-    # This was the most frequent label mapping from the old configuration file,
-    # which had default mappings for each model.
-    labelMap = [
-        "B:bars",
-        "S:slate",
-        "I:chyron", "N:chyron", "Y:chyron",
-        "C:credits", "R:credits",
-        "W:other_opening", "L:other_opening", "O:other_opening", "M:other_opening",
-        "E:other_text", "K:other_text", "G:other_text", "T:other_text", "F:other_text"]
-
     metadata = AppMetadata(
         name="Scenes-with-text Detection",
         description="Detects scenes with text, like slates, chyrons and credits. "
@@ -57,36 +48,42 @@ def appmetadata() -> AppMetadata:
 
     metadata.add_parameter(
         name='useClassifier', type='boolean', default=True,
-        description='Use the image classifier model to generate TimePoint annotations')
+        description='Use the image classifier model to generate TimePoint annotations.')
     metadata.add_parameter(
         name='tpModelName', type='string',
-        default='convnext_lg',
+        default='convnext_small',
         choices=list(set(m.stem.split('.')[1] for m in available_models)),
-        description='model name to use for classification, only applies when `useClassifier=true`')
+        description='Model name to use for classification, only applies when `useClassifier=true`.')
     metadata.add_parameter(
         name='tpUsePosModel', type='boolean', default=True,
-        description='Use the model trained with positional features, only applies when `useClassifier=true`')
+        description='Use the model trained with positional features, only applies when `useClassifier=true`.')
     metadata.add_parameter(
         name='tpStartAt', type='integer', default=0,
-        description='Number of milliseconds into the video to start processing, only applies when `useClassifier=true`')
+        description='Number of milliseconds into the video to start processing, only applies when `useClassifier=true`.')
     metadata.add_parameter(
         name='tpStopAt', type='integer', default=sys.maxsize,
-        description='Number of milliseconds into the video to stop processing, only applies when `useClassifier=true`')
+        description='Number of milliseconds into the video to stop processing, only applies when `useClassifier=true`.')
     metadata.add_parameter(
         name='tpSampleRate', type='integer', default=1000,
-        description='Milliseconds between sampled frames, only applies when `useClassifier=true`')
+        description='Milliseconds between sampled frames, only applies when `useClassifier=true`.')
     metadata.add_parameter(
         name='useStitcher', type='boolean', default=True,
-        description='Use the stitcher after classifying the TimePoints')
+        description='Use the stitcher after classifying the TimePoints.')
     metadata.add_parameter(
         name='tfMinTPScore', type='number', default=0.5,
-        description='Minimum score for a TimePoint to be included in a TimeFrame, only applies when `useStitcher=true`')
+        description='Minimum score for a TimePoint to be included in a TimeFrame. '
+                    'A lower value will include more TimePoints in the TimeFrame '
+                    '(increasing recall in exchange for precision). '
+                    'Only applies when `useStitcher=true`.')
     metadata.add_parameter(
         name='tfMinTFScore', type='number', default=0.9,
-        description='Minimum score for a TimeFrame, only applies when `useStitcher=true`')
+        description='Minimum score for a TimeFrame. '
+                    'A lower value will include more TimeFrames in the output '
+                    '(increasing recall in exchange for precision). '
+                    'Only applies when `useStitcher=true`')
     metadata.add_parameter(
         name='tfMinTFDuration', type='integer', default=5000,
-        description='Minimum duration of a TimeFrame in milliseconds, only applies when `useStitcher=true`')
+        description='Minimum duration of a TimeFrame in milliseconds, only applies when `useStitcher=true`.')
     metadata.add_parameter(
         name='tfAllowOverlap', type='boolean', default=False,
         description='Allow overlapping time frames, only applies when `useStitcher=true`')
@@ -96,17 +93,28 @@ def appmetadata() -> AppMetadata:
                     'multiple representative points to follow any changes in the scene. '
                     'Only applies when `useStitcher=true`')
     metadata.add_parameter(
-        # TODO: do we want to use the old default labelMap from the configuration here or
-        # do we truly want an empty mapping and use the pass-through, as hinted at in the
-        # description (which is now not in sync with the code).
-        name='tfLabelMap', type='map', default=labelMap,
+        name='tfLabelMap', type='map', default=[],
         description=(
-            'Mapping of a label in the input annotations to a new label. Must be formatted as '
-            'IN_LABEL:OUT_LABEL (with a colon). To pass multiple mappings, use this parameter '
-            'multiple times. By default, all the input labels are passed as is, including any '
-            'negative labels (with default value being no remapping at all). However, when '
-            'at least one label is remapped, all the other "unset" labels are discarded as '
-            'a negative label. Only applies when `useStitcher=true`'))
+            '(See also `tfLabelMapPreset`, set `tfLabelMapPreset=nopreset` to make sure that a preset does not '
+            'override `tfLabelMap` when using this) Mapping of a label in the input TimePoint annotations to a new '
+            'label of the stitched TimeFrame annotations. Must be formatted as IN_LABEL:OUT_LABEL (with a colon). To '
+            'pass multiple mappings, use this parameter multiple times. When two+ TP labels are mapped to a TF  '
+            'label, it essentially works as a "binning" operation. If no mapping is used, all the input labels are '
+            'passed-through, meaning no change in both TP & TF labelsets. However, when at least one label is mapped, '
+            'all the other "unset" labels are mapped to the negative label (`-`) and if `-` does not exist in the TF '
+            'labelset, it is added automatically. '
+            'Only applies when `useStitcher=true`.'))
+    labelMapPresetsReformat = {schname: str([f'`{lbl}`:`{binname}`' 
+                                             for binname, lbls in scheme.items() 
+                                             for lbl in lbls]) 
+                               for schname, scheme in modeling.config.bins.binning_schemes.items()}
+    labelMapPresetsMarkdown = '\n'.join([f"- `{k}`: {v}" for k, v in labelMapPresetsReformat.items()])
+    metadata.add_parameter(
+        name='tfLabelMapPreset', type='string', default='relaxed',
+        choices=list(modeling.config.bins.binning_schemes.keys()),
+        description=f'(See also `tfLabelMap`) Preset alias of a label mapping. If not `nopreset`, this parameter will '
+                    f'override the `tfLabelMap` parameter. Available presets are:\n{labelMapPresetsMarkdown}\n\n '
+                    f'Only applies when `useStitcher=true`.')
 
     return metadata
 
diff --git a/modeling/__init__.py b/modeling/__init__.py
index 7d4f9a5..8f7d265 100644
--- a/modeling/__init__.py
+++ b/modeling/__init__.py
@@ -2,10 +2,14 @@
 positive_label = '+'
 
 # full typology from https://github.com/clamsproject/app-swt-detection/issues/1
-FRAME_TYPES = ["B", "S", "W", "L", "O",
-               "M", "I", "N", "E", "P", "Y", "K", "G", "T", "F", "C", "R"]
-FRAME_TYPES_WITH_SUBTYPES = ["B", "SH", "SC", "SD", "SB", "SG", "W", "L", "O",
-                             "M", "I", "N", "E", "P", "Y", "K", "G", "T", "F", "C", "R"]
+FRAME_TYPES = [
+    "B", "S", "I", "C", "R", "M", "O", "W",
+    "N", "Y", "U", "K",
+    "L", "G", "F", "E", "T",
+    "P",
+]
+FRAME_TYPES_WITH_SUBTYPES = FRAME_TYPES.copy() + ['SH', 'SC', 'SD', 'SB', 'SG']
+FRAME_TYPES_WITH_SUBTYPES.remove('S')
 
 # These are time frames that are typically static (that is, the text does not
 # move around or change as with rolling credits). These are frame names after
diff --git a/modeling/backbones.py b/modeling/backbones.py
index 6180fc6..38a3f11 100644
--- a/modeling/backbones.py
+++ b/modeling/backbones.py
@@ -46,16 +46,6 @@ class ExtractorModel:
 # TODO/REVIEW - do we want to be able to change the weight versions (IMAGENET1K_V1 etc)
 # ==========================================|
 # ConvNext Models
-class ConvnextBaseExtractor(ExtractorModel):
-    name = "convnext_base"
-    dim = 1024
-
-    def __init__(self):
-        self.model = convnext_base(weights=ConvNeXt_Base_Weights.IMAGENET1K_V1)
-        self.model.classifier[-1] = torch.nn.Identity()
-        self.preprocess = ConvNeXt_Base_Weights.IMAGENET1K_V1.transforms()
-
-
 class ConvnextTinyExtractor(ExtractorModel):
     name = "convnext_tiny"
     dim = 768
@@ -76,6 +66,16 @@ def __init__(self):
         self.preprocess = ConvNeXt_Small_Weights.IMAGENET1K_V1.transforms()
 
 
+class ConvnextBaseExtractor(ExtractorModel):
+    name = "convnext_base"
+    dim = 1024
+
+    def __init__(self):
+        self.model = convnext_base(weights=ConvNeXt_Base_Weights.IMAGENET1K_V1)
+        self.model.classifier[-1] = torch.nn.Identity()
+        self.preprocess = ConvNeXt_Base_Weights.IMAGENET1K_V1.transforms()
+
+
 class ConvnextLargeExtractor(ExtractorModel):
     name = "convnext_lg"
     dim = 1536
diff --git a/modeling/classify.py b/modeling/classify.py
index 0f29079..5ac433b 100644
--- a/modeling/classify.py
+++ b/modeling/classify.py
@@ -20,7 +20,7 @@ def __init__(self, model_stem, logger_name=None):
         model_config_file = f"{model_stem}.yml"
         model_checkpoint = f"{model_stem}.pt"
         model_config = yaml.safe_load(open(model_config_file))
-        self.training_labels = train.pretraining_binned_label(model_config)
+        self.training_labels = train.get_prebinned_labelset(model_config)
         self.featurizer = data_loader.FeatureExtractor(**model_config)
         label_count = len(FRAME_TYPES) + 1
         if 'bins' in model_config:
diff --git a/modeling/config/__init__.py b/modeling/config/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/modeling/config/batches.py b/modeling/config/batches.py
new file mode 100644
index 0000000..b37d387
--- /dev/null
+++ b/modeling/config/batches.py
@@ -0,0 +1,146 @@
+# training batches see https://github.com/clamsproject/aapb-annotations/tree/main/batches for more details
+unintersting_guids = ["cpb-aacip-254-75r7szdz"]   # the most "uninteresting" video (88/882 frames annotated)
+aapb_collaboration_27_a = [
+    "cpb-aacip-129-88qc000k",
+    "cpb-aacip-f2c34dd1cd4",
+    "cpb-aacip-191-40ksn47s",
+    "cpb-aacip-507-028pc2tp2z",
+    "cpb-aacip-507-0k26970f2d",
+    "cpb-aacip-507-0z70v8b17g",
+    "cpb-aacip-512-542j67b12n",
+    "cpb-aacip-394-149p8fcw",
+    "cpb-aacip-08fb0e1f287",
+    "cpb-aacip-512-t43hx1753b",
+    "cpb-aacip-d0f2569e145",
+    "cpb-aacip-d8ebafee30e",
+    "cpb-aacip-c72fd5cbadc",
+    "cpb-aacip-b6a2a39b7eb",
+    "cpb-aacip-512-4b2x34nv4t",
+    "cpb-aacip-512-416sx65d21",
+    "cpb-aacip-512-3f4kk95f7h",
+    "cpb-aacip-512-348gf0nn4f",
+    "cpb-aacip-516-cc0tq5s94c",
+    "cpb-aacip-516-8c9r20sq57",
+]
+aapb_collaboration_27_b = [
+    "cpb-aacip-254-75r7szdz",
+    "cpb-aacip-259-4j09zf95",
+    "cpb-aacip-526-hd7np1xn78",
+    "cpb-aacip-75-72b8h82x",
+    "cpb-aacip-fe9efa663c6",
+    "cpb-aacip-f5847a01db5",
+    "cpb-aacip-f2a88c88d9d",
+    "cpb-aacip-ec590a6761d",
+    "cpb-aacip-c7c64922fcd",
+    "cpb-aacip-f3fa7215348",
+    "cpb-aacip-f13ae523e20",
+    "cpb-aacip-e7a25f07d35",
+    "cpb-aacip-ce6d5e4bd7f",
+    "cpb-aacip-690722078b2",
+    "cpb-aacip-e649135e6ec",
+    "cpb-aacip-15-93gxdjk6",
+    "cpb-aacip-512-4f1mg7h078",
+    "cpb-aacip-512-4m9183583s",
+    "cpb-aacip-512-4b2x34nt7g",
+    "cpb-aacip-512-3n20c4tr34",
+    "cpb-aacip-512-3f4kk9534t",
+]
+aapb_collaboration_27_c = [
+    "cpb-aacip-0d338c39a45",
+    "cpb-aacip-0acac5e9db7",
+    "cpb-aacip-0bdc7c8ecc5",
+    "cpb-aacip-1032b1787b4",
+    "cpb-aacip-516-qf8jd4qq96",
+    "cpb-aacip-259-kh0dzd78",
+    "cpb-aacip-259-nc5sb374",
+    "cpb-aacip-259-mw28cq94",
+    "cpb-aacip-259-mc8rg22j",
+    "cpb-aacip-259-5717pw8g",
+    "cpb-aacip-259-pr7msz5c",
+    "cpb-aacip-259-g737390m",
+    "cpb-aacip-259-pc2t780t",
+    "cpb-aacip-259-q814r90k",
+    "cpb-aacip-259-cz325478",
+    "cpb-aacip-259-vh5cgj9t",
+    "cpb-aacip-259-gt5ff704",
+    "cpb-aacip-259-gx44t714",
+    "cpb-aacip-259-pr7msz3w",
+    "cpb-aacip-259-zg6g5589",
+]
+aapb_collaboration_27_d = [
+    "cpb-aacip-259-wh2dcb8p"
+]  # this is kept for evaluation set, should not be used for training!!!
+
+# new image-level annotation added after v6.1
+# "challenging images" from later annotation (`bm` set and `pbd` set, 60 videos, 2024 summer)
+# recorded as `aapb-collaboration-27-e` in the annotation repo
+guids_with_challenging_images_bm = [
+    "cpb-aacip-00a9ed7f2ba",
+    "cpb-aacip-0ace30f582d", 
+    "cpb-aacip-0ae98c2c4b2",
+    "cpb-aacip-0b0c0afdb11",
+    "cpb-aacip-0bb992d2e7f",
+    "cpb-aacip-0c0374c6c55",
+    "cpb-aacip-0c727d4cac3",
+    "cpb-aacip-0c74795718b",
+    "cpb-aacip-0cb2aebaeba",
+    "cpb-aacip-0d74af419eb",
+    "cpb-aacip-0dbb0610457",
+    "cpb-aacip-0dfbaaec869",
+    "cpb-aacip-0e2dc840bc6",
+    "cpb-aacip-0ed7e315160",
+    "cpb-aacip-0f3879e2f22",
+    "cpb-aacip-0f80359ada5",
+    "cpb-aacip-0f80a4f5ed2",
+    "cpb-aacip-0fe3e4311e1",
+    "cpb-aacip-1a365705273",
+    "cpb-aacip-1b295839145",
+]
+guids_with_challenging_images_pbd = [
+    "cpb-aacip-110-16c2ftdq",
+    "cpb-aacip-120-1615dwkg",
+    "cpb-aacip-120-203xsm67",
+    "cpb-aacip-15-70msck27",
+    "cpb-aacip-16-19s1rw84",
+    "cpb-aacip-17-07tmq941",
+    "cpb-aacip-17-58bg87rx",
+    "cpb-aacip-17-65v6xv27",
+    "cpb-aacip-17-81jhbz0g",
+    "cpb-aacip-29-61djhjcx",
+    "cpb-aacip-29-8380gksn",
+    "cpb-aacip-41-322bvxmn",
+    "cpb-aacip-41-42n5tj3d",
+    "cpb-aacip-110-35gb5r94",
+    "cpb-aacip-111-655dvd99",
+    "cpb-aacip-120-19s1rrsp",
+    "cpb-aacip-120-31qfv097",
+    "cpb-aacip-120-73pvmn2q",
+    "cpb-aacip-120-80ht7h8d",
+    "cpb-aacip-120-8279d01c",
+    "cpb-aacip-120-83xsjcb2",
+    "cpb-aacip-17-88qc0md1",
+    "cpb-aacip-35-36tx99h9",
+    "cpb-aacip-42-78tb31b1",
+    "cpb-aacip-52-84zgn1wb",
+    "cpb-aacip-52-87pnw5t0",
+    "cpb-aacip-55-84mkmvwx",
+    "cpb-aacip-75-13905w9q",
+    "cpb-aacip-75-54xgxnzg",
+    "cpb-aacip-77-02q5807j",
+    "cpb-aacip-77-074tnfhr",
+    "cpb-aacip-77-1937qsxt",
+    "cpb-aacip-77-214mx491",
+    "cpb-aacip-77-24jm6zc8",
+    "cpb-aacip-77-35t77b2v",
+    "cpb-aacip-77-44bp0mdh",
+    "cpb-aacip-77-49t1h3fv",
+    "cpb-aacip-77-81jhbv89",
+    "cpb-aacip-83-074tmx7h",
+    "cpb-aacip-83-23612txx",
+]
+aapb_collaboration_27_e = guids_with_challenging_images_bm + guids_with_challenging_images_pbd
+
+# this `pbd` subset contains 40 videos with 15328 (non-transitional) + 557 (transitional) = 15885 frames
+# then updated with more annotations 19331 (non-transitional) + 801 (transitional) = 20132 frames
+# we decided to use this subset for the fixed validation set (#116)
+guids_for_fixed_validation_set = guids_with_challenging_images_pbd
diff --git a/modeling/config/bins.py b/modeling/config/bins.py
new file mode 100644
index 0000000..149946c
--- /dev/null
+++ b/modeling/config/bins.py
@@ -0,0 +1,75 @@
+from typing import List, Dict
+
+nobinning = {}
+
+label_bins = {
+    "Bars": ["B"],
+    "Chyron-other": ["Y", "U", "K"],
+    "Chyron-person": ["I", "N"],
+    "Credits": ["C", "R"],
+    "Main": ["M"],
+    "Opening": ["O", "W"],
+    "Slate": ["S", "S:H", "S:C", "S:D", "S:B", "S:G"],
+    "Other-text-sm": ["L", "G", "F", "E", "T"],
+    "Other-text-md": ["M", "O", "W", "L", "G", "F", "E", "T"],
+    "Other-text-lg": ["M", "O", "W", "Y", "U", "K", "L", "G", "F", "E", "T"],
+}
+
+binning_schemes: Dict[str, Dict[str, List[str]]] = {
+    "noprebin": nobinning,
+    "nomap": nobinning,
+
+    "strict": {
+        "Bars": label_bins["Bars"],
+        "Slate": label_bins["Slate"],
+        "Chyron-person": label_bins["Chyron-person"],
+        "Credits": label_bins["Credits"],
+        "Main": label_bins["Main"],
+        "Opening": label_bins["Opening"],
+        "Chyron-other": label_bins["Chyron-other"],
+        "Other-text": label_bins["Other-text-sm"],
+    },
+
+    "simpler": {
+        "Bars": label_bins["Bars"],
+        "Slate": label_bins["Slate"],
+        "Chyron": label_bins["Chyron-person"],
+        "Credits": label_bins["Credits"],
+    },
+    
+    "simple": {
+        "Bars": label_bins["Bars"],
+        "Slate": label_bins["Slate"],
+        "Chyron-person": label_bins["Chyron-person"],
+        "Credits": label_bins["Credits"],
+        "Other-text": label_bins["Other-text-lg"],
+    },
+
+    "relaxed": {
+        "Bars": label_bins["Bars"],
+        "Slate": label_bins["Slate"],
+        "Chyron": label_bins["Chyron-other"] + label_bins["Chyron-person"],
+        "Credits": label_bins["Credits"],
+        "Other-text": label_bins["Other-text-md"],
+    },
+
+    "binary-bars": {
+        "Bars": label_bins["Bars"],
+    },
+
+    "binary-slate": {
+        "Slate": label_bins["Slate"],
+    },
+
+    "binary-chyron-strict": {
+        "Chyron-person": label_bins["Chyron-person"],
+    },
+
+    "binary-chyron-relaxed": {
+        "Chyron": label_bins["Chyron-other"] + label_bins["Chyron-person"],
+    },
+
+    "binary-credits": {
+        "Credits": label_bins["Credits"],
+    }
+}
diff --git a/modeling/data_loader.py b/modeling/data_loader.py
index 657b360..25abb1e 100644
--- a/modeling/data_loader.py
+++ b/modeling/data_loader.py
@@ -212,6 +212,8 @@ def get_stills(self, media_path: Union[os.PathLike, str],
                                          label=row[2],
                                          subtype_label=row[3],
                                          mod=row[4].lower() == 'true') for row in reader if row[1] == 'true']
+        frame_list.sort(key=lambda x: x.filename)
+        print(f'found {len(frame_list)} annotated frames')
         # CSV rows with mod=True should be discarded (taken as "unseen")
         # maybe we can throw away the video with the least (88) frames annotation from B2 
         # to make 20/20 split on dense vs sparse annotation
diff --git a/modeling/evaluate.py b/modeling/evaluate.py
deleted file mode 100644
index f88c92a..0000000
--- a/modeling/evaluate.py
+++ /dev/null
@@ -1,60 +0,0 @@
-import csv
-import logging
-import sys
-from collections import defaultdict
-from pathlib import Path
-from typing import IO, List
-
-import torch
-from torch import Tensor
-from torchmetrics import functional as metrics
-from torchmetrics.classification import BinaryAccuracy, BinaryPrecision, BinaryRecall, BinaryF1Score
-
-
-def evaluate(model, valid_loader, labelset, export_fname=None):
-    model.eval()
-    # valid_loader is currently expected to be a single batch
-    vfeats, vlabels = next(iter(valid_loader))
-    outputs = model(vfeats)
-    _, preds = torch.max(outputs, 1)
-    p = metrics.precision(preds, vlabels, 'multiclass', num_classes=len(labelset), average='macro')
-    r = metrics.recall(preds, vlabels, 'multiclass', num_classes=len(labelset), average='macro')
-    f = metrics.f1_score(preds, vlabels, 'multiclass', num_classes=len(labelset), average='macro')
-    # m = metrics.confusion_matrix(preds, vlabels, 'multiclass', num_classes=len(labelset))
-
-    if not export_fname:
-        export_f = sys.stdout
-    else:
-        path = Path(export_fname)
-        path.parent.mkdir(parents=True, exist_ok=True)
-        export_f = open(path, 'w', encoding='utf8')
-    export_train_result(out=export_f, preds=preds, golds=vlabels,
-                        labelset=labelset, img_enc_name=valid_loader.dataset.img_enc_name)
-    logging.info(f"Exported to {export_f.name}")
-    return p, r, f
-
-
-def export_train_result(out: IO, preds: Tensor, golds: Tensor, labelset: List[str], img_enc_name: str):
-    """Exports the data into a human-readable format.
-    """
-
-    label_metrics = defaultdict(dict)
-
-    for i, label in enumerate(labelset):
-        pred_labels = torch.where(preds == i, 1, 0)
-        true_labels = torch.where(golds == i, 1, 0)
-        binary_acc = BinaryAccuracy()
-        binary_prec = BinaryPrecision()
-        binary_recall = BinaryRecall()
-        binary_f1 = BinaryF1Score()
-        label_metrics[label] = {"Model_Name": img_enc_name,
-                                "Label": label,
-                                "Accuracy": binary_acc(pred_labels, true_labels).item(),
-                                "Precision": binary_prec(pred_labels, true_labels).item(),
-                                "Recall": binary_recall(pred_labels, true_labels).item(),
-                                "F1-Score": binary_f1(pred_labels, true_labels).item()}
-        
-    writer = csv.DictWriter(out, fieldnames=["Model_Name", "Label", "Accuracy", "Precision", "Recall", "F1-Score"])
-    writer.writeheader()
-    for label, metrics in label_metrics.items():
-        writer.writerow(metrics)
diff --git a/modeling/gridsearch.py b/modeling/gridsearch.py
index 8a49ab4..96a519e 100644
--- a/modeling/gridsearch.py
+++ b/modeling/gridsearch.py
@@ -1,55 +1,74 @@
 import itertools
+import math
 
 import modeling.backbones
+import modeling.config.bins
+from modeling.config.batches import unintersting_guids, aapb_collaboration_27_b, aapb_collaboration_27_c, \
+    aapb_collaboration_27_e
 
+
+## TP classifier training grid search
 # parameter values from the best performing models in v5.0
-num_splits = {1}
+split_size = {math.inf}
 num_epochs = {10}
-num_layers = {2}
-pos_length = {6000000}
+num_layers = {4}
 pos_unit = {60000}
-dropouts = {0.1}
+dropouts = {0.3}
 # img_enc_name = modeling.backbones.model_map.keys()
-img_enc_name = {'convnext_lg', 'convnext_tiny'}
+img_enc_name = {'convnext_lg', 'convnext_small', 'convnext_tiny'}
 
+# positional encoding configuration best performed as of v6.0
+pos_length = {6000000}
 pos_abs_th_front = {5}
 pos_abs_th_end = {10}
 pos_vec_coeff = {0, 0.5}  # when 0, positional encoding is not enabled
+
+# to see effect of training data size
 block_guids_train = [
-    ["cpb-aacip-254-75r7szdz"],     # always block this the most "uninteresting" video (88/882 frames annotated)
+    
+    # aapb_collaboration_27_a + aapb_collaboration_27_b + aapb_collaboration_27_c + aapb_collaboration_27_e,  # no training data
+    ## 20 + 21 + 20 + 60 = 121 videos (excluding `d` batch) with 1 uninsteresting video and 40 videos in `pbd` subset in `e` batch
+    # unintersting_guids + aapb_collaboration_27_b + aapb_collaboration_27_c + aapb_collaboration_27_e,  # only the first "dense" annotations (shown as 0101@xxx in the bar plotting from see_results.py )
+    # unintersting_guids + aapb_collaboration_27_c + aapb_collaboration_27_e,  # adding "sparse" annotations (shown as 0061@xxx)
+    # unintersting_guids + aapb_collaboration_27_e,  # adding the second "dense" annotations (shown as 0081@xxx)
+    unintersting_guids,  # adding the "challenging" images, this is the "full" size (shown as 0001@xxx, but really using 80 guids from `a` + `b` + `c` + `bm`)
+    # note that the "uninstresting" video is never used in all training sets
 ]
+# since we now do validation on a fixed set, this parameter has no effect, keeping it for historical reasons
 block_guids_valid = [
-    [                               # block all loosely-annotated videos
-        "cpb-aacip-254-75r7szdz",
-        "cpb-aacip-259-4j09zf95",
-        "cpb-aacip-526-hd7np1xn78",
-        "cpb-aacip-75-72b8h82x",
-        "cpb-aacip-fe9efa663c6",
-        "cpb-aacip-f5847a01db5",
-        "cpb-aacip-f2a88c88d9d",
-        "cpb-aacip-ec590a6761d",
-        "cpb-aacip-c7c64922fcd",
-        "cpb-aacip-f3fa7215348",
-        "cpb-aacip-f13ae523e20",
-        "cpb-aacip-e7a25f07d35",
-        "cpb-aacip-ce6d5e4bd7f",
-        "cpb-aacip-690722078b2",
-        "cpb-aacip-e649135e6ec",
-        "cpb-aacip-15-93gxdjk6",
-        "cpb-aacip-512-4f1mg7h078",
-        "cpb-aacip-512-4m9183583s",
-        "cpb-aacip-512-4b2x34nt7g",
-        "cpb-aacip-512-3n20c4tr34",
-        "cpb-aacip-512-3f4kk9534t",
-    ]
-    # {"cpb-aacip-254-75r7szdz"},  # effectively no block except
+    aapb_collaboration_27_b + aapb_collaboration_27_e,  # block all loosely-annotated videos and the challenging images
+    #  unintersting_guids,  # effectively no block except
+]
+
+# "prebin" configurations. 
+# NOTE that postbin is not a part of the CV model, so is not handled here
+# for single binning configuration, just use the binning dict
+# for multiple binning configurations (for experimental reasons), use the binning scheme names (str)
+prebin = ['noprebin']
+# prebin = []
+
+clss_param_keys = ['split_size', 'num_epochs', 'num_layers', 'pos_length', 'pos_unit', 'dropouts', 'img_enc_name', 
+                   'pos_abs_th_front', 'pos_abs_th_end', 'pos_vec_coeff', 
+                   'block_guids_train', 'block_guids_valid', 
+                   'prebin']
+
+## TF stitching grid search (for future)
+tfMinTPScores = set()
+tfMinTFScores = set()
+tfLabelMapFns = set()
+tfMinNegTFDurations = set()
+tfMinTFDurations = set()
+tfAllowOverlaps = set()
+
+stit_param_keys = [
+    "tfMinTPScores", "tfMinTFScores", "tfMinTFDurations", 
+    # "tfAllowOverlaps"  # we don't have a proper evaluator for overlapping TFs
 ]
-# we no longer use bins, keeping this just for historical reference
-# bins = [{'pre': {'slate': ['S'], 'chyron': ['I', 'N', 'Y'], 'credit': ['C']}}]
 
-param_keys = ['num_splits', 'num_epochs', 'num_layers', 'pos_length', 'pos_unit', 'dropouts', 'img_enc_name', 'pos_abs_th_front', 'pos_abs_th_end', 'pos_vec_coeff', 'block_guids_train', 'block_guids_valid']
 l = locals()
-configs = []
-for vals in itertools.product(*[l[key] for key in param_keys]):
-    configs.append(dict(zip(param_keys, vals)))
+
+
+def get_classifier_training_grids():
+    for vals in itertools.product(*[l[key] for key in clss_param_keys]):
+        yield dict(zip(clss_param_keys, vals))
 
diff --git a/modeling/models/20240724-075329.convnext_lg.posF.pt b/modeling/models/20240724-075329.convnext_lg.posF.pt
deleted file mode 100644
index 967f523..0000000
Binary files a/modeling/models/20240724-075329.convnext_lg.posF.pt and /dev/null differ
diff --git a/modeling/models/20240724-075329.convnext_lg.posF.yml b/modeling/models/20240724-075329.convnext_lg.posF.yml
deleted file mode 100644
index 64e3ec4..0000000
--- a/modeling/models/20240724-075329.convnext_lg.posF.yml
+++ /dev/null
@@ -1,34 +0,0 @@
-num_splits: 1
-num_epochs: 10
-num_layers: 2
-pos_length: 6000000
-pos_unit: 60000
-dropouts: 0.1
-img_enc_name: convnext_lg
-pos_abs_th_front: 5
-pos_abs_th_end: 10
-pos_vec_coeff: 0
-block_guids_train:
-- cpb-aacip-254-75r7szdz
-block_guids_valid:
-- cpb-aacip-254-75r7szdz
-- cpb-aacip-259-4j09zf95
-- cpb-aacip-526-hd7np1xn78
-- cpb-aacip-75-72b8h82x
-- cpb-aacip-fe9efa663c6
-- cpb-aacip-f5847a01db5
-- cpb-aacip-f2a88c88d9d
-- cpb-aacip-ec590a6761d
-- cpb-aacip-c7c64922fcd
-- cpb-aacip-f3fa7215348
-- cpb-aacip-f13ae523e20
-- cpb-aacip-e7a25f07d35
-- cpb-aacip-ce6d5e4bd7f
-- cpb-aacip-690722078b2
-- cpb-aacip-e649135e6ec
-- cpb-aacip-15-93gxdjk6
-- cpb-aacip-512-4f1mg7h078
-- cpb-aacip-512-4m9183583s
-- cpb-aacip-512-4b2x34nt7g
-- cpb-aacip-512-3n20c4tr34
-- cpb-aacip-512-3f4kk9534t
diff --git a/modeling/models/20240724-075349.convnext_lg.posT.pt b/modeling/models/20240724-075349.convnext_lg.posT.pt
deleted file mode 100644
index 446e29b..0000000
Binary files a/modeling/models/20240724-075349.convnext_lg.posT.pt and /dev/null differ
diff --git a/modeling/models/20240724-075349.convnext_lg.posT.yml b/modeling/models/20240724-075349.convnext_lg.posT.yml
deleted file mode 100644
index b78310a..0000000
--- a/modeling/models/20240724-075349.convnext_lg.posT.yml
+++ /dev/null
@@ -1,34 +0,0 @@
-num_splits: 1
-num_epochs: 10
-num_layers: 2
-pos_length: 6000000
-pos_unit: 60000
-dropouts: 0.1
-img_enc_name: convnext_lg
-pos_abs_th_front: 5
-pos_abs_th_end: 10
-pos_vec_coeff: 0.5
-block_guids_train:
-- cpb-aacip-254-75r7szdz
-block_guids_valid:
-- cpb-aacip-254-75r7szdz
-- cpb-aacip-259-4j09zf95
-- cpb-aacip-526-hd7np1xn78
-- cpb-aacip-75-72b8h82x
-- cpb-aacip-fe9efa663c6
-- cpb-aacip-f5847a01db5
-- cpb-aacip-f2a88c88d9d
-- cpb-aacip-ec590a6761d
-- cpb-aacip-c7c64922fcd
-- cpb-aacip-f3fa7215348
-- cpb-aacip-f13ae523e20
-- cpb-aacip-e7a25f07d35
-- cpb-aacip-ce6d5e4bd7f
-- cpb-aacip-690722078b2
-- cpb-aacip-e649135e6ec
-- cpb-aacip-15-93gxdjk6
-- cpb-aacip-512-4f1mg7h078
-- cpb-aacip-512-4m9183583s
-- cpb-aacip-512-4b2x34nt7g
-- cpb-aacip-512-3n20c4tr34
-- cpb-aacip-512-3f4kk9534t
diff --git a/modeling/models/20240724-075401.convnext_tiny.posF.pt b/modeling/models/20240724-075401.convnext_tiny.posF.pt
deleted file mode 100644
index db0342f..0000000
Binary files a/modeling/models/20240724-075401.convnext_tiny.posF.pt and /dev/null differ
diff --git a/modeling/models/20240724-075401.convnext_tiny.posF.yml b/modeling/models/20240724-075401.convnext_tiny.posF.yml
deleted file mode 100644
index 8380072..0000000
--- a/modeling/models/20240724-075401.convnext_tiny.posF.yml
+++ /dev/null
@@ -1,34 +0,0 @@
-num_splits: 1
-num_epochs: 10
-num_layers: 2
-pos_length: 6000000
-pos_unit: 60000
-dropouts: 0.1
-img_enc_name: convnext_tiny
-pos_abs_th_front: 5
-pos_abs_th_end: 10
-pos_vec_coeff: 0
-block_guids_train:
-- cpb-aacip-254-75r7szdz
-block_guids_valid:
-- cpb-aacip-254-75r7szdz
-- cpb-aacip-259-4j09zf95
-- cpb-aacip-526-hd7np1xn78
-- cpb-aacip-75-72b8h82x
-- cpb-aacip-fe9efa663c6
-- cpb-aacip-f5847a01db5
-- cpb-aacip-f2a88c88d9d
-- cpb-aacip-ec590a6761d
-- cpb-aacip-c7c64922fcd
-- cpb-aacip-f3fa7215348
-- cpb-aacip-f13ae523e20
-- cpb-aacip-e7a25f07d35
-- cpb-aacip-ce6d5e4bd7f
-- cpb-aacip-690722078b2
-- cpb-aacip-e649135e6ec
-- cpb-aacip-15-93gxdjk6
-- cpb-aacip-512-4f1mg7h078
-- cpb-aacip-512-4m9183583s
-- cpb-aacip-512-4b2x34nt7g
-- cpb-aacip-512-3n20c4tr34
-- cpb-aacip-512-3f4kk9534t
diff --git a/modeling/models/20240724-075418.convnext_tiny.posT.pt b/modeling/models/20240724-075418.convnext_tiny.posT.pt
deleted file mode 100644
index 84a4511..0000000
Binary files a/modeling/models/20240724-075418.convnext_tiny.posT.pt and /dev/null differ
diff --git a/modeling/models/20240724-075418.convnext_tiny.posT.yml b/modeling/models/20240724-075418.convnext_tiny.posT.yml
deleted file mode 100644
index 322da27..0000000
--- a/modeling/models/20240724-075418.convnext_tiny.posT.yml
+++ /dev/null
@@ -1,34 +0,0 @@
-num_splits: 1
-num_epochs: 10
-num_layers: 2
-pos_length: 6000000
-pos_unit: 60000
-dropouts: 0.1
-img_enc_name: convnext_tiny
-pos_abs_th_front: 5
-pos_abs_th_end: 10
-pos_vec_coeff: 0.5
-block_guids_train:
-- cpb-aacip-254-75r7szdz
-block_guids_valid:
-- cpb-aacip-254-75r7szdz
-- cpb-aacip-259-4j09zf95
-- cpb-aacip-526-hd7np1xn78
-- cpb-aacip-75-72b8h82x
-- cpb-aacip-fe9efa663c6
-- cpb-aacip-f5847a01db5
-- cpb-aacip-f2a88c88d9d
-- cpb-aacip-ec590a6761d
-- cpb-aacip-c7c64922fcd
-- cpb-aacip-f3fa7215348
-- cpb-aacip-f13ae523e20
-- cpb-aacip-e7a25f07d35
-- cpb-aacip-ce6d5e4bd7f
-- cpb-aacip-690722078b2
-- cpb-aacip-e649135e6ec
-- cpb-aacip-15-93gxdjk6
-- cpb-aacip-512-4f1mg7h078
-- cpb-aacip-512-4m9183583s
-- cpb-aacip-512-4b2x34nt7g
-- cpb-aacip-512-3n20c4tr34
-- cpb-aacip-512-3f4kk9534t
diff --git a/modeling/models/20241125-125501.convnext_small.noprebin.posF.pt b/modeling/models/20241125-125501.convnext_small.noprebin.posF.pt
new file mode 100644
index 0000000..522da7a
Binary files /dev/null and b/modeling/models/20241125-125501.convnext_small.noprebin.posF.pt differ
diff --git a/modeling/models/20241125-125501.convnext_small.noprebin.posF.yml b/modeling/models/20241125-125501.convnext_small.noprebin.posF.yml
new file mode 100644
index 0000000..2f96cc2
--- /dev/null
+++ b/modeling/models/20241125-125501.convnext_small.noprebin.posF.yml
@@ -0,0 +1,95 @@
+split_size: .inf
+num_epochs: 10
+num_layers: 4
+pos_length: 6000000
+pos_unit: 60000
+dropouts: 0.3
+img_enc_name: convnext_small
+pos_abs_th_front: 5
+pos_abs_th_end: 10
+pos_vec_coeff: 0
+block_guids_train:
+- cpb-aacip-254-75r7szdz
+block_guids_valid:
+- cpb-aacip-254-75r7szdz
+- cpb-aacip-259-4j09zf95
+- cpb-aacip-526-hd7np1xn78
+- cpb-aacip-75-72b8h82x
+- cpb-aacip-fe9efa663c6
+- cpb-aacip-f5847a01db5
+- cpb-aacip-f2a88c88d9d
+- cpb-aacip-ec590a6761d
+- cpb-aacip-c7c64922fcd
+- cpb-aacip-f3fa7215348
+- cpb-aacip-f13ae523e20
+- cpb-aacip-e7a25f07d35
+- cpb-aacip-ce6d5e4bd7f
+- cpb-aacip-690722078b2
+- cpb-aacip-e649135e6ec
+- cpb-aacip-15-93gxdjk6
+- cpb-aacip-512-4f1mg7h078
+- cpb-aacip-512-4m9183583s
+- cpb-aacip-512-4b2x34nt7g
+- cpb-aacip-512-3n20c4tr34
+- cpb-aacip-512-3f4kk9534t
+- cpb-aacip-00a9ed7f2ba
+- cpb-aacip-0ace30f582d
+- cpb-aacip-0ae98c2c4b2
+- cpb-aacip-0b0c0afdb11
+- cpb-aacip-0bb992d2e7f
+- cpb-aacip-0c0374c6c55
+- cpb-aacip-0c727d4cac3
+- cpb-aacip-0c74795718b
+- cpb-aacip-0cb2aebaeba
+- cpb-aacip-0d74af419eb
+- cpb-aacip-0dbb0610457
+- cpb-aacip-0dfbaaec869
+- cpb-aacip-0e2dc840bc6
+- cpb-aacip-0ed7e315160
+- cpb-aacip-0f3879e2f22
+- cpb-aacip-0f80359ada5
+- cpb-aacip-0f80a4f5ed2
+- cpb-aacip-0fe3e4311e1
+- cpb-aacip-1a365705273
+- cpb-aacip-1b295839145
+- cpb-aacip-110-16c2ftdq
+- cpb-aacip-120-1615dwkg
+- cpb-aacip-120-203xsm67
+- cpb-aacip-15-70msck27
+- cpb-aacip-16-19s1rw84
+- cpb-aacip-17-07tmq941
+- cpb-aacip-17-58bg87rx
+- cpb-aacip-17-65v6xv27
+- cpb-aacip-17-81jhbz0g
+- cpb-aacip-29-61djhjcx
+- cpb-aacip-29-8380gksn
+- cpb-aacip-41-322bvxmn
+- cpb-aacip-41-42n5tj3d
+- cpb-aacip-110-35gb5r94
+- cpb-aacip-111-655dvd99
+- cpb-aacip-120-19s1rrsp
+- cpb-aacip-120-31qfv097
+- cpb-aacip-120-73pvmn2q
+- cpb-aacip-120-80ht7h8d
+- cpb-aacip-120-8279d01c
+- cpb-aacip-120-83xsjcb2
+- cpb-aacip-17-88qc0md1
+- cpb-aacip-35-36tx99h9
+- cpb-aacip-42-78tb31b1
+- cpb-aacip-52-84zgn1wb
+- cpb-aacip-52-87pnw5t0
+- cpb-aacip-55-84mkmvwx
+- cpb-aacip-75-13905w9q
+- cpb-aacip-75-54xgxnzg
+- cpb-aacip-77-02q5807j
+- cpb-aacip-77-074tnfhr
+- cpb-aacip-77-1937qsxt
+- cpb-aacip-77-214mx491
+- cpb-aacip-77-24jm6zc8
+- cpb-aacip-77-35t77b2v
+- cpb-aacip-77-44bp0mdh
+- cpb-aacip-77-49t1h3fv
+- cpb-aacip-77-81jhbv89
+- cpb-aacip-83-074tmx7h
+- cpb-aacip-83-23612txx
+prebin: {}
diff --git a/modeling/models/20241125-125550.convnext_small.noprebin.posT.pt b/modeling/models/20241125-125550.convnext_small.noprebin.posT.pt
new file mode 100644
index 0000000..2eab7f8
Binary files /dev/null and b/modeling/models/20241125-125550.convnext_small.noprebin.posT.pt differ
diff --git a/modeling/models/20241125-125550.convnext_small.noprebin.posT.yml b/modeling/models/20241125-125550.convnext_small.noprebin.posT.yml
new file mode 100644
index 0000000..7e0c3e0
--- /dev/null
+++ b/modeling/models/20241125-125550.convnext_small.noprebin.posT.yml
@@ -0,0 +1,95 @@
+split_size: .inf
+num_epochs: 10
+num_layers: 4
+pos_length: 6000000
+pos_unit: 60000
+dropouts: 0.3
+img_enc_name: convnext_small
+pos_abs_th_front: 5
+pos_abs_th_end: 10
+pos_vec_coeff: 0.5
+block_guids_train:
+- cpb-aacip-254-75r7szdz
+block_guids_valid:
+- cpb-aacip-254-75r7szdz
+- cpb-aacip-259-4j09zf95
+- cpb-aacip-526-hd7np1xn78
+- cpb-aacip-75-72b8h82x
+- cpb-aacip-fe9efa663c6
+- cpb-aacip-f5847a01db5
+- cpb-aacip-f2a88c88d9d
+- cpb-aacip-ec590a6761d
+- cpb-aacip-c7c64922fcd
+- cpb-aacip-f3fa7215348
+- cpb-aacip-f13ae523e20
+- cpb-aacip-e7a25f07d35
+- cpb-aacip-ce6d5e4bd7f
+- cpb-aacip-690722078b2
+- cpb-aacip-e649135e6ec
+- cpb-aacip-15-93gxdjk6
+- cpb-aacip-512-4f1mg7h078
+- cpb-aacip-512-4m9183583s
+- cpb-aacip-512-4b2x34nt7g
+- cpb-aacip-512-3n20c4tr34
+- cpb-aacip-512-3f4kk9534t
+- cpb-aacip-00a9ed7f2ba
+- cpb-aacip-0ace30f582d
+- cpb-aacip-0ae98c2c4b2
+- cpb-aacip-0b0c0afdb11
+- cpb-aacip-0bb992d2e7f
+- cpb-aacip-0c0374c6c55
+- cpb-aacip-0c727d4cac3
+- cpb-aacip-0c74795718b
+- cpb-aacip-0cb2aebaeba
+- cpb-aacip-0d74af419eb
+- cpb-aacip-0dbb0610457
+- cpb-aacip-0dfbaaec869
+- cpb-aacip-0e2dc840bc6
+- cpb-aacip-0ed7e315160
+- cpb-aacip-0f3879e2f22
+- cpb-aacip-0f80359ada5
+- cpb-aacip-0f80a4f5ed2
+- cpb-aacip-0fe3e4311e1
+- cpb-aacip-1a365705273
+- cpb-aacip-1b295839145
+- cpb-aacip-110-16c2ftdq
+- cpb-aacip-120-1615dwkg
+- cpb-aacip-120-203xsm67
+- cpb-aacip-15-70msck27
+- cpb-aacip-16-19s1rw84
+- cpb-aacip-17-07tmq941
+- cpb-aacip-17-58bg87rx
+- cpb-aacip-17-65v6xv27
+- cpb-aacip-17-81jhbz0g
+- cpb-aacip-29-61djhjcx
+- cpb-aacip-29-8380gksn
+- cpb-aacip-41-322bvxmn
+- cpb-aacip-41-42n5tj3d
+- cpb-aacip-110-35gb5r94
+- cpb-aacip-111-655dvd99
+- cpb-aacip-120-19s1rrsp
+- cpb-aacip-120-31qfv097
+- cpb-aacip-120-73pvmn2q
+- cpb-aacip-120-80ht7h8d
+- cpb-aacip-120-8279d01c
+- cpb-aacip-120-83xsjcb2
+- cpb-aacip-17-88qc0md1
+- cpb-aacip-35-36tx99h9
+- cpb-aacip-42-78tb31b1
+- cpb-aacip-52-84zgn1wb
+- cpb-aacip-52-87pnw5t0
+- cpb-aacip-55-84mkmvwx
+- cpb-aacip-75-13905w9q
+- cpb-aacip-75-54xgxnzg
+- cpb-aacip-77-02q5807j
+- cpb-aacip-77-074tnfhr
+- cpb-aacip-77-1937qsxt
+- cpb-aacip-77-214mx491
+- cpb-aacip-77-24jm6zc8
+- cpb-aacip-77-35t77b2v
+- cpb-aacip-77-44bp0mdh
+- cpb-aacip-77-49t1h3fv
+- cpb-aacip-77-81jhbv89
+- cpb-aacip-83-074tmx7h
+- cpb-aacip-83-23612txx
+prebin: {}
diff --git a/modeling/models/20241125-125627.convnext_tiny.noprebin.posF.pt b/modeling/models/20241125-125627.convnext_tiny.noprebin.posF.pt
new file mode 100644
index 0000000..ce503d2
Binary files /dev/null and b/modeling/models/20241125-125627.convnext_tiny.noprebin.posF.pt differ
diff --git a/modeling/models/20241125-125627.convnext_tiny.noprebin.posF.yml b/modeling/models/20241125-125627.convnext_tiny.noprebin.posF.yml
new file mode 100644
index 0000000..1b84a3b
--- /dev/null
+++ b/modeling/models/20241125-125627.convnext_tiny.noprebin.posF.yml
@@ -0,0 +1,95 @@
+split_size: .inf
+num_epochs: 10
+num_layers: 4
+pos_length: 6000000
+pos_unit: 60000
+dropouts: 0.3
+img_enc_name: convnext_tiny
+pos_abs_th_front: 5
+pos_abs_th_end: 10
+pos_vec_coeff: 0
+block_guids_train:
+- cpb-aacip-254-75r7szdz
+block_guids_valid:
+- cpb-aacip-254-75r7szdz
+- cpb-aacip-259-4j09zf95
+- cpb-aacip-526-hd7np1xn78
+- cpb-aacip-75-72b8h82x
+- cpb-aacip-fe9efa663c6
+- cpb-aacip-f5847a01db5
+- cpb-aacip-f2a88c88d9d
+- cpb-aacip-ec590a6761d
+- cpb-aacip-c7c64922fcd
+- cpb-aacip-f3fa7215348
+- cpb-aacip-f13ae523e20
+- cpb-aacip-e7a25f07d35
+- cpb-aacip-ce6d5e4bd7f
+- cpb-aacip-690722078b2
+- cpb-aacip-e649135e6ec
+- cpb-aacip-15-93gxdjk6
+- cpb-aacip-512-4f1mg7h078
+- cpb-aacip-512-4m9183583s
+- cpb-aacip-512-4b2x34nt7g
+- cpb-aacip-512-3n20c4tr34
+- cpb-aacip-512-3f4kk9534t
+- cpb-aacip-00a9ed7f2ba
+- cpb-aacip-0ace30f582d
+- cpb-aacip-0ae98c2c4b2
+- cpb-aacip-0b0c0afdb11
+- cpb-aacip-0bb992d2e7f
+- cpb-aacip-0c0374c6c55
+- cpb-aacip-0c727d4cac3
+- cpb-aacip-0c74795718b
+- cpb-aacip-0cb2aebaeba
+- cpb-aacip-0d74af419eb
+- cpb-aacip-0dbb0610457
+- cpb-aacip-0dfbaaec869
+- cpb-aacip-0e2dc840bc6
+- cpb-aacip-0ed7e315160
+- cpb-aacip-0f3879e2f22
+- cpb-aacip-0f80359ada5
+- cpb-aacip-0f80a4f5ed2
+- cpb-aacip-0fe3e4311e1
+- cpb-aacip-1a365705273
+- cpb-aacip-1b295839145
+- cpb-aacip-110-16c2ftdq
+- cpb-aacip-120-1615dwkg
+- cpb-aacip-120-203xsm67
+- cpb-aacip-15-70msck27
+- cpb-aacip-16-19s1rw84
+- cpb-aacip-17-07tmq941
+- cpb-aacip-17-58bg87rx
+- cpb-aacip-17-65v6xv27
+- cpb-aacip-17-81jhbz0g
+- cpb-aacip-29-61djhjcx
+- cpb-aacip-29-8380gksn
+- cpb-aacip-41-322bvxmn
+- cpb-aacip-41-42n5tj3d
+- cpb-aacip-110-35gb5r94
+- cpb-aacip-111-655dvd99
+- cpb-aacip-120-19s1rrsp
+- cpb-aacip-120-31qfv097
+- cpb-aacip-120-73pvmn2q
+- cpb-aacip-120-80ht7h8d
+- cpb-aacip-120-8279d01c
+- cpb-aacip-120-83xsjcb2
+- cpb-aacip-17-88qc0md1
+- cpb-aacip-35-36tx99h9
+- cpb-aacip-42-78tb31b1
+- cpb-aacip-52-84zgn1wb
+- cpb-aacip-52-87pnw5t0
+- cpb-aacip-55-84mkmvwx
+- cpb-aacip-75-13905w9q
+- cpb-aacip-75-54xgxnzg
+- cpb-aacip-77-02q5807j
+- cpb-aacip-77-074tnfhr
+- cpb-aacip-77-1937qsxt
+- cpb-aacip-77-214mx491
+- cpb-aacip-77-24jm6zc8
+- cpb-aacip-77-35t77b2v
+- cpb-aacip-77-44bp0mdh
+- cpb-aacip-77-49t1h3fv
+- cpb-aacip-77-81jhbv89
+- cpb-aacip-83-074tmx7h
+- cpb-aacip-83-23612txx
+prebin: {}
diff --git a/modeling/models/20241125-125705.convnext_tiny.noprebin.posT.pt b/modeling/models/20241125-125705.convnext_tiny.noprebin.posT.pt
new file mode 100644
index 0000000..1d0600b
Binary files /dev/null and b/modeling/models/20241125-125705.convnext_tiny.noprebin.posT.pt differ
diff --git a/modeling/models/20241125-125705.convnext_tiny.noprebin.posT.yml b/modeling/models/20241125-125705.convnext_tiny.noprebin.posT.yml
new file mode 100644
index 0000000..e7dd0df
--- /dev/null
+++ b/modeling/models/20241125-125705.convnext_tiny.noprebin.posT.yml
@@ -0,0 +1,95 @@
+split_size: .inf
+num_epochs: 10
+num_layers: 4
+pos_length: 6000000
+pos_unit: 60000
+dropouts: 0.3
+img_enc_name: convnext_tiny
+pos_abs_th_front: 5
+pos_abs_th_end: 10
+pos_vec_coeff: 0.5
+block_guids_train:
+- cpb-aacip-254-75r7szdz
+block_guids_valid:
+- cpb-aacip-254-75r7szdz
+- cpb-aacip-259-4j09zf95
+- cpb-aacip-526-hd7np1xn78
+- cpb-aacip-75-72b8h82x
+- cpb-aacip-fe9efa663c6
+- cpb-aacip-f5847a01db5
+- cpb-aacip-f2a88c88d9d
+- cpb-aacip-ec590a6761d
+- cpb-aacip-c7c64922fcd
+- cpb-aacip-f3fa7215348
+- cpb-aacip-f13ae523e20
+- cpb-aacip-e7a25f07d35
+- cpb-aacip-ce6d5e4bd7f
+- cpb-aacip-690722078b2
+- cpb-aacip-e649135e6ec
+- cpb-aacip-15-93gxdjk6
+- cpb-aacip-512-4f1mg7h078
+- cpb-aacip-512-4m9183583s
+- cpb-aacip-512-4b2x34nt7g
+- cpb-aacip-512-3n20c4tr34
+- cpb-aacip-512-3f4kk9534t
+- cpb-aacip-00a9ed7f2ba
+- cpb-aacip-0ace30f582d
+- cpb-aacip-0ae98c2c4b2
+- cpb-aacip-0b0c0afdb11
+- cpb-aacip-0bb992d2e7f
+- cpb-aacip-0c0374c6c55
+- cpb-aacip-0c727d4cac3
+- cpb-aacip-0c74795718b
+- cpb-aacip-0cb2aebaeba
+- cpb-aacip-0d74af419eb
+- cpb-aacip-0dbb0610457
+- cpb-aacip-0dfbaaec869
+- cpb-aacip-0e2dc840bc6
+- cpb-aacip-0ed7e315160
+- cpb-aacip-0f3879e2f22
+- cpb-aacip-0f80359ada5
+- cpb-aacip-0f80a4f5ed2
+- cpb-aacip-0fe3e4311e1
+- cpb-aacip-1a365705273
+- cpb-aacip-1b295839145
+- cpb-aacip-110-16c2ftdq
+- cpb-aacip-120-1615dwkg
+- cpb-aacip-120-203xsm67
+- cpb-aacip-15-70msck27
+- cpb-aacip-16-19s1rw84
+- cpb-aacip-17-07tmq941
+- cpb-aacip-17-58bg87rx
+- cpb-aacip-17-65v6xv27
+- cpb-aacip-17-81jhbz0g
+- cpb-aacip-29-61djhjcx
+- cpb-aacip-29-8380gksn
+- cpb-aacip-41-322bvxmn
+- cpb-aacip-41-42n5tj3d
+- cpb-aacip-110-35gb5r94
+- cpb-aacip-111-655dvd99
+- cpb-aacip-120-19s1rrsp
+- cpb-aacip-120-31qfv097
+- cpb-aacip-120-73pvmn2q
+- cpb-aacip-120-80ht7h8d
+- cpb-aacip-120-8279d01c
+- cpb-aacip-120-83xsjcb2
+- cpb-aacip-17-88qc0md1
+- cpb-aacip-35-36tx99h9
+- cpb-aacip-42-78tb31b1
+- cpb-aacip-52-84zgn1wb
+- cpb-aacip-52-87pnw5t0
+- cpb-aacip-55-84mkmvwx
+- cpb-aacip-75-13905w9q
+- cpb-aacip-75-54xgxnzg
+- cpb-aacip-77-02q5807j
+- cpb-aacip-77-074tnfhr
+- cpb-aacip-77-1937qsxt
+- cpb-aacip-77-214mx491
+- cpb-aacip-77-24jm6zc8
+- cpb-aacip-77-35t77b2v
+- cpb-aacip-77-44bp0mdh
+- cpb-aacip-77-49t1h3fv
+- cpb-aacip-77-81jhbv89
+- cpb-aacip-83-074tmx7h
+- cpb-aacip-83-23612txx
+prebin: {}
diff --git a/modeling/models/20241125-125741.convnext_lg.noprebin.posF.pt b/modeling/models/20241125-125741.convnext_lg.noprebin.posF.pt
new file mode 100644
index 0000000..c6fc713
Binary files /dev/null and b/modeling/models/20241125-125741.convnext_lg.noprebin.posF.pt differ
diff --git a/modeling/models/20241125-125741.convnext_lg.noprebin.posF.yml b/modeling/models/20241125-125741.convnext_lg.noprebin.posF.yml
new file mode 100644
index 0000000..12d9685
--- /dev/null
+++ b/modeling/models/20241125-125741.convnext_lg.noprebin.posF.yml
@@ -0,0 +1,95 @@
+split_size: .inf
+num_epochs: 10
+num_layers: 4
+pos_length: 6000000
+pos_unit: 60000
+dropouts: 0.3
+img_enc_name: convnext_lg
+pos_abs_th_front: 5
+pos_abs_th_end: 10
+pos_vec_coeff: 0
+block_guids_train:
+- cpb-aacip-254-75r7szdz
+block_guids_valid:
+- cpb-aacip-254-75r7szdz
+- cpb-aacip-259-4j09zf95
+- cpb-aacip-526-hd7np1xn78
+- cpb-aacip-75-72b8h82x
+- cpb-aacip-fe9efa663c6
+- cpb-aacip-f5847a01db5
+- cpb-aacip-f2a88c88d9d
+- cpb-aacip-ec590a6761d
+- cpb-aacip-c7c64922fcd
+- cpb-aacip-f3fa7215348
+- cpb-aacip-f13ae523e20
+- cpb-aacip-e7a25f07d35
+- cpb-aacip-ce6d5e4bd7f
+- cpb-aacip-690722078b2
+- cpb-aacip-e649135e6ec
+- cpb-aacip-15-93gxdjk6
+- cpb-aacip-512-4f1mg7h078
+- cpb-aacip-512-4m9183583s
+- cpb-aacip-512-4b2x34nt7g
+- cpb-aacip-512-3n20c4tr34
+- cpb-aacip-512-3f4kk9534t
+- cpb-aacip-00a9ed7f2ba
+- cpb-aacip-0ace30f582d
+- cpb-aacip-0ae98c2c4b2
+- cpb-aacip-0b0c0afdb11
+- cpb-aacip-0bb992d2e7f
+- cpb-aacip-0c0374c6c55
+- cpb-aacip-0c727d4cac3
+- cpb-aacip-0c74795718b
+- cpb-aacip-0cb2aebaeba
+- cpb-aacip-0d74af419eb
+- cpb-aacip-0dbb0610457
+- cpb-aacip-0dfbaaec869
+- cpb-aacip-0e2dc840bc6
+- cpb-aacip-0ed7e315160
+- cpb-aacip-0f3879e2f22
+- cpb-aacip-0f80359ada5
+- cpb-aacip-0f80a4f5ed2
+- cpb-aacip-0fe3e4311e1
+- cpb-aacip-1a365705273
+- cpb-aacip-1b295839145
+- cpb-aacip-110-16c2ftdq
+- cpb-aacip-120-1615dwkg
+- cpb-aacip-120-203xsm67
+- cpb-aacip-15-70msck27
+- cpb-aacip-16-19s1rw84
+- cpb-aacip-17-07tmq941
+- cpb-aacip-17-58bg87rx
+- cpb-aacip-17-65v6xv27
+- cpb-aacip-17-81jhbz0g
+- cpb-aacip-29-61djhjcx
+- cpb-aacip-29-8380gksn
+- cpb-aacip-41-322bvxmn
+- cpb-aacip-41-42n5tj3d
+- cpb-aacip-110-35gb5r94
+- cpb-aacip-111-655dvd99
+- cpb-aacip-120-19s1rrsp
+- cpb-aacip-120-31qfv097
+- cpb-aacip-120-73pvmn2q
+- cpb-aacip-120-80ht7h8d
+- cpb-aacip-120-8279d01c
+- cpb-aacip-120-83xsjcb2
+- cpb-aacip-17-88qc0md1
+- cpb-aacip-35-36tx99h9
+- cpb-aacip-42-78tb31b1
+- cpb-aacip-52-84zgn1wb
+- cpb-aacip-52-87pnw5t0
+- cpb-aacip-55-84mkmvwx
+- cpb-aacip-75-13905w9q
+- cpb-aacip-75-54xgxnzg
+- cpb-aacip-77-02q5807j
+- cpb-aacip-77-074tnfhr
+- cpb-aacip-77-1937qsxt
+- cpb-aacip-77-214mx491
+- cpb-aacip-77-24jm6zc8
+- cpb-aacip-77-35t77b2v
+- cpb-aacip-77-44bp0mdh
+- cpb-aacip-77-49t1h3fv
+- cpb-aacip-77-81jhbv89
+- cpb-aacip-83-074tmx7h
+- cpb-aacip-83-23612txx
+prebin: {}
diff --git a/modeling/models/20241125-125904.convnext_lg.noprebin.posT.pt b/modeling/models/20241125-125904.convnext_lg.noprebin.posT.pt
new file mode 100644
index 0000000..395bc1d
Binary files /dev/null and b/modeling/models/20241125-125904.convnext_lg.noprebin.posT.pt differ
diff --git a/modeling/models/20241125-125904.convnext_lg.noprebin.posT.yml b/modeling/models/20241125-125904.convnext_lg.noprebin.posT.yml
new file mode 100644
index 0000000..315367c
--- /dev/null
+++ b/modeling/models/20241125-125904.convnext_lg.noprebin.posT.yml
@@ -0,0 +1,95 @@
+split_size: .inf
+num_epochs: 10
+num_layers: 4
+pos_length: 6000000
+pos_unit: 60000
+dropouts: 0.3
+img_enc_name: convnext_lg
+pos_abs_th_front: 5
+pos_abs_th_end: 10
+pos_vec_coeff: 0.5
+block_guids_train:
+- cpb-aacip-254-75r7szdz
+block_guids_valid:
+- cpb-aacip-254-75r7szdz
+- cpb-aacip-259-4j09zf95
+- cpb-aacip-526-hd7np1xn78
+- cpb-aacip-75-72b8h82x
+- cpb-aacip-fe9efa663c6
+- cpb-aacip-f5847a01db5
+- cpb-aacip-f2a88c88d9d
+- cpb-aacip-ec590a6761d
+- cpb-aacip-c7c64922fcd
+- cpb-aacip-f3fa7215348
+- cpb-aacip-f13ae523e20
+- cpb-aacip-e7a25f07d35
+- cpb-aacip-ce6d5e4bd7f
+- cpb-aacip-690722078b2
+- cpb-aacip-e649135e6ec
+- cpb-aacip-15-93gxdjk6
+- cpb-aacip-512-4f1mg7h078
+- cpb-aacip-512-4m9183583s
+- cpb-aacip-512-4b2x34nt7g
+- cpb-aacip-512-3n20c4tr34
+- cpb-aacip-512-3f4kk9534t
+- cpb-aacip-00a9ed7f2ba
+- cpb-aacip-0ace30f582d
+- cpb-aacip-0ae98c2c4b2
+- cpb-aacip-0b0c0afdb11
+- cpb-aacip-0bb992d2e7f
+- cpb-aacip-0c0374c6c55
+- cpb-aacip-0c727d4cac3
+- cpb-aacip-0c74795718b
+- cpb-aacip-0cb2aebaeba
+- cpb-aacip-0d74af419eb
+- cpb-aacip-0dbb0610457
+- cpb-aacip-0dfbaaec869
+- cpb-aacip-0e2dc840bc6
+- cpb-aacip-0ed7e315160
+- cpb-aacip-0f3879e2f22
+- cpb-aacip-0f80359ada5
+- cpb-aacip-0f80a4f5ed2
+- cpb-aacip-0fe3e4311e1
+- cpb-aacip-1a365705273
+- cpb-aacip-1b295839145
+- cpb-aacip-110-16c2ftdq
+- cpb-aacip-120-1615dwkg
+- cpb-aacip-120-203xsm67
+- cpb-aacip-15-70msck27
+- cpb-aacip-16-19s1rw84
+- cpb-aacip-17-07tmq941
+- cpb-aacip-17-58bg87rx
+- cpb-aacip-17-65v6xv27
+- cpb-aacip-17-81jhbz0g
+- cpb-aacip-29-61djhjcx
+- cpb-aacip-29-8380gksn
+- cpb-aacip-41-322bvxmn
+- cpb-aacip-41-42n5tj3d
+- cpb-aacip-110-35gb5r94
+- cpb-aacip-111-655dvd99
+- cpb-aacip-120-19s1rrsp
+- cpb-aacip-120-31qfv097
+- cpb-aacip-120-73pvmn2q
+- cpb-aacip-120-80ht7h8d
+- cpb-aacip-120-8279d01c
+- cpb-aacip-120-83xsjcb2
+- cpb-aacip-17-88qc0md1
+- cpb-aacip-35-36tx99h9
+- cpb-aacip-42-78tb31b1
+- cpb-aacip-52-84zgn1wb
+- cpb-aacip-52-87pnw5t0
+- cpb-aacip-55-84mkmvwx
+- cpb-aacip-75-13905w9q
+- cpb-aacip-75-54xgxnzg
+- cpb-aacip-77-02q5807j
+- cpb-aacip-77-074tnfhr
+- cpb-aacip-77-1937qsxt
+- cpb-aacip-77-214mx491
+- cpb-aacip-77-24jm6zc8
+- cpb-aacip-77-35t77b2v
+- cpb-aacip-77-44bp0mdh
+- cpb-aacip-77-49t1h3fv
+- cpb-aacip-77-81jhbv89
+- cpb-aacip-83-074tmx7h
+- cpb-aacip-83-23612txx
+prebin: {}
diff --git a/modeling/train.py b/modeling/train.py
index 594b93f..4109ba5 100644
--- a/modeling/train.py
+++ b/modeling/train.py
@@ -7,18 +7,21 @@
 import shutil
 import time
 from pathlib import Path
-from typing import Union
+from typing import Union, List
 
 import numpy as np
 import torch
 import torch.nn as nn
 import yaml
+from torch import Tensor
 from torch.utils.data import Dataset, DataLoader
 from tqdm import tqdm
 
 import modeling
-from modeling import data_loader, FRAME_TYPES
-from modeling.evaluate import evaluate
+import modeling.config.batches
+from modeling import data_loader, gridsearch, FRAME_TYPES
+from modeling.config import bins
+from modeling.validate import validate
 
 logging.basicConfig(
     level=logging.WARNING,
@@ -31,7 +34,7 @@
 
 
 class SWTDataset(Dataset):
-    def __init__(self, backbone_model_name, labels, vectors):
+    def __init__(self, backbone_model_name: str, labels: List[int], vectors: List[Tensor]):
         self.img_enc_name = backbone_model_name
         self.feat_dim = vectors[0].shape[0] if len(vectors) > 0 else None
         self.labels = labels
@@ -56,12 +59,12 @@ def get_guids(data_dir):
 
 
 def pretraining_bin(label, specs):
-    if specs is None or "bins" not in specs:
+    if specs is None or "prebin" not in specs:
         return int_encode(label)
-    for i, ptbin in enumerate(specs["bins"].values()):
+    for i, ptbin in enumerate(specs["prebin"].values()):
         if label and label in ptbin:
             return i
-    return len(specs["bins"].keys())
+    return len(specs["prebin"].keys())
 
 
 def load_config(config):
@@ -108,16 +111,12 @@ def prepare_datasets(indir, train_guids, validation_guids, configs):
     1. positional encodings are applied.
     2. 'gold' labels are attached to each vector.
     3. split of vectors into training and validation sets (at video-level, meaning all frames from a video are either in training or validation set).
-    returns training dataset, validation dataset, and the number of labels (after "pre"-binning)
+    returns training dataset, validation dataset
     """
     train_vectors = []
     train_labels = []
     valid_vectors = []
     valid_labels = []
-    if configs and 'bins' in configs:
-        pre_bin_size = len(configs['bins'].keys()) + 1
-    else:
-        pre_bin_size = len(FRAME_TYPES) + 1
     train_vimg = valid_vimg = 0
 
     extractor = data_loader.FeatureExtractor(**config)
@@ -144,66 +143,72 @@ def prepare_datasets(indir, train_guids, validation_guids, configs):
     logger.info(f'train: {len(train_guids)} videos, {train_vimg} images, valid: {len(validation_guids)} videos, {valid_vimg} images')
     train = SWTDataset(configs['img_enc_name'], train_labels, train_vectors)
     valid = SWTDataset(configs['img_enc_name'], valid_labels, valid_vectors)
-    return train, valid, pre_bin_size
+    return train, valid
 
 
-def k_fold_train(indir, outdir, config_file, configs, train_id=time.strftime("%Y%m%d-%H%M%S")):
+def train(indir, outdir, config_file, configs, train_id=time.strftime("%Y%m%d-%H%M%S")):
     os.makedirs(outdir, exist_ok=True)
 
     # need to implement "whitelist"?
     guids = get_guids(indir)
     configs = load_config(configs) if not isinstance(configs, dict) else configs
     logger.info(f'Using config: {configs}')
-    len_val = len(guids) // configs['num_splits']
+    train_all_guids = set(guids) - set(configs['block_guids_train'])
     val_set_spec = []
     p_scores = []
     r_scores = []
     f_scores = []
     loss = nn.CrossEntropyLoss(reduction="none")
     device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-    # if num_splits == 1, validation is empty. single fold training.
-    if configs['num_splits'] == 1:
-        train_guids = set(guids)
-        validation_guids = set([])
-        for block in configs['block_guids_train']:
-            train_guids.discard(block)
+    
+    # the number of labels (after "pre"-binning)
+    if configs and 'prebin' in configs and len(configs['prebin']) > 0:
+        num_labels = len(configs['prebin'].keys()) + 1
+    else:
+        num_labels = len(FRAME_TYPES) + 1
+    labelset = get_prebinned_labelset(configs)
+
+    # if split_size > #videos, nothing to "hold-out". Hence, single fold training and validate against the "fixed" set
+    if configs['split_size'] >= len(train_all_guids):
+        valid_guids = modeling.config.batches.guids_for_fixed_validation_set
+        train_all_guids = train_all_guids - set(valid_guids)
         # prepare_datasets seems to work fine with empty validation set
-        train, valid, labelset_size = prepare_datasets(indir, train_guids, validation_guids, configs)
-        train_loader = DataLoader(train, batch_size=len(guids), shuffle=True)
-        export_model_file = f"{outdir}/{train_id}.pt"
+        train, valid = prepare_datasets(indir, train_all_guids, valid_guids, configs)
+        train_loader = DataLoader(train, batch_size=len(train_all_guids), shuffle=True)
+        valid_loader = DataLoader(valid, batch_size=len(valid), shuffle=False)   
+        base_fname = f"{outdir}/{train_id}"
+        export_model_file = f"{base_fname}.pt"
         model = train_model(
-            get_net(train.feat_dim, labelset_size, configs['num_layers'], configs['dropouts']),
+            get_net(train.feat_dim, num_labels, configs['num_layers'], configs['dropouts']),
             loss, device, train_loader, configs)
         torch.save(model.state_dict(), export_model_file)
-        p_config = Path(f'{outdir}/{train_id}.yml')
-        export_kfold_config(config_file, configs, p_config)
+        p_config = Path(f'{base_fname}.yml')
+        validate(model, valid_loader, labelset, export_fname=f'{base_fname}.csv')
+        export_train_config(config_file, configs, p_config)
         return
-    # otherwise, do k-fold training, where k = 'num_splits'
-    for i in range(0, configs['num_splits']):
-        validation_guids = set(guids[i*len_val:(i+1)*len_val])
-        train_guids = set(guids) - validation_guids
-        for block in configs['block_guids_valid']:
-            validation_guids.discard(block)
-        for block in configs['block_guids_train']:
-            train_guids.discard(block)
+    # otherwise, do k-fold training with k's size = split_size
+    valid_all_guids = sorted(list(train_all_guids - set(configs['block_guids_valid'])))
+    for j, i in enumerate(range(0, len(valid_all_guids), configs['split_size'])):
+        validation_guids = set(valid_all_guids[i:i + configs['split_size']])
+        train_guids = train_all_guids - validation_guids
         logger.debug(f'After applied block lists:')
         logger.debug(f'train set: {train_guids}')
         logger.debug(f'dev set: {validation_guids}')
-        train, valid, labelset_size = prepare_datasets(indir, train_guids, validation_guids, configs)
+        train, valid = prepare_datasets(indir, train_guids, validation_guids, configs)
         # `train` and `valid` vectors DO contain positional encoding after `split_dataset`
         if not train.has_data() or not valid.has_data():
-            logger.info(f"Skipping fold {i} due to lack of data")
+            logger.info(f"Skipping fold {j} due to lack of data")
             continue
         train_loader = DataLoader(train, batch_size=40, shuffle=True)
         valid_loader = DataLoader(valid, batch_size=len(valid), shuffle=True)
-        logger.info(f'Split {i}: training on {len(train_guids)} videos, validating on {validation_guids}')
-        export_csv_file = f"{outdir}/{train_id}.kfold_{i:03d}.csv"
-        export_model_file = f"{outdir}/{train_id}.kfold_{i:03d}.pt"
+        logger.info(f'Split {j}: training on {len(train_guids)} videos, validating on {validation_guids}')
+        export_csv_file = f"{outdir}/{train_id}.kfold_{j:03d}.csv"
+        export_model_file = f"{outdir}/{train_id}.kfold_{j:03d}.pt"
         model = train_model(
-                get_net(train.feat_dim, labelset_size, configs['num_layers'], configs['dropouts']),
+                get_net(train.feat_dim, num_labels, configs['num_layers'], configs['dropouts']),
                 loss, device, train_loader, configs)
         torch.save(model.state_dict(), export_model_file)
-        p, r, f = evaluate(model, valid_loader, pretraining_binned_label(config), export_fname=export_csv_file)
+        p, r, f = validate(model, valid_loader, labelset, export_fname=export_csv_file)
         val_set_spec.append(validation_guids)
         p_scores.append(p)
         r_scores.append(r)
@@ -211,11 +216,11 @@ def k_fold_train(indir, outdir, config_file, configs, train_id=time.strftime("%Y
     p_config = Path(f'{outdir}/{train_id}.kfold_config.yml')
     p_results = Path(f'{outdir}/{train_id}.kfold_results.txt')
     p_results.parent.mkdir(parents=True, exist_ok=True)
-    export_kfold_config(config_file, configs, p_config)
+    export_train_config(config_file, configs, p_config)
     export_kfold_results(val_set_spec, p_scores, r_scores, f_scores, p_results)
 
 
-def export_kfold_config(config_file: str, configs: dict, outfile: Union[str, Path]):
+def export_train_config(config_file: str, configs: dict, outfile: Union[str, Path]):
     if config_file is None:
         configs_copy = copy.deepcopy(configs)
         with open(outfile, 'w') as fh:
@@ -244,9 +249,9 @@ def export_kfold_results(trial_specs, p_scores, r_scores, f_scores, p_results):
         out.write(f'\trecall = {sum(r_scores) / len(r_scores)}\n')
 
 
-def pretraining_binned_label(config):
-    if 'bins' in config:
-        return list(config["bins"].keys()) + [modeling.negative_label]
+def get_prebinned_labelset(config):
+    if 'prebin' in config and len(config['prebin']) > 0:
+        return list(config["prebin"].keys()) + [modeling.negative_label]
     return modeling.FRAME_TYPES + [modeling.negative_label]
 
 
@@ -298,13 +303,26 @@ def train_model(model, loss_fn, device, train_loader, configs):
         configs = [load_config(args.config)]
     else:
         import modeling.gridsearch
-        configs = modeling.gridsearch.configs
+        configs = list(modeling.gridsearch.get_classifier_training_grids())
+
+    if not os.path.exists(args.outdir):
+        os.makedirs(args.outdir)
     print(f'training with {str(len(configs))} different configurations')
-    for config in configs:
+    for i, config in enumerate(configs):
+        print(f'training with config {i+1}/{len(configs)}')
         timestamp = time.strftime("%Y%m%d-%H%M%S")
         backbonename = config['img_enc_name']
+        if len(config['prebin']) == 0:  # empty binning = no binning
+            config.pop('prebin')
+            prebin_name = 'noprebin'
+        elif isinstance(config['prebin'], str):
+            prebin_name = config['prebin']
+            config['prebin'] = bins.binning_schemes[prebin_name]
+        else:
+            # "regular" fully-custom binning config via a proper dict - can't set a name for this
+            prebin_name = 'custom'
         positionalencoding = "pos" + ("F" if config["pos_vec_coeff"] == 0 else "T")
-        k_fold_train(
+        train(
             indir=args.indir, outdir=args.outdir, config_file=args.config, configs=config,
-            train_id='.'.join([timestamp, backbonename, positionalencoding])
+            train_id='.'.join(filter(None, [timestamp, backbonename, prebin_name, positionalencoding]))
         )
diff --git a/modeling/validate.py b/modeling/validate.py
new file mode 100644
index 0000000..7363571
--- /dev/null
+++ b/modeling/validate.py
@@ -0,0 +1,86 @@
+import csv
+import logging
+import sys
+from collections import defaultdict
+from pathlib import Path
+from typing import IO, List
+
+import torch
+from torch import Tensor
+from torchmetrics.functional import accuracy, precision, recall, f1_score, confusion_matrix
+
+
+def validate(model, valid_loader, labelset, export_fname=None):
+    model.eval()
+    # valid_loader is currently expected to be a single batch
+    vfeats, vlabels = next(iter(valid_loader))
+    outputs = model(vfeats)
+    _, preds = torch.max(outputs, 1)
+
+    if not export_fname:
+        export_f = sys.stdout
+    else:
+        path = Path(export_fname)
+        path.parent.mkdir(parents=True, exist_ok=True)
+        export_f = open(path, 'w', encoding='utf8')
+    p, r, f = export_validation_results(out=export_f, preds=preds, golds=vlabels,
+                                        labelset=labelset, img_enc_name=valid_loader.dataset.img_enc_name)
+    logging.info(f"Exported to {export_f.name}")
+    return p, r, f
+
+
+def export_validation_results(out: IO, preds: Tensor, golds: Tensor, labelset: List[str], img_enc_name: str):
+    """Exports the data into a human-readable format.
+    """
+
+    label_metrics = defaultdict(dict)
+    a_avg = accuracy(preds, golds, task='multiclass', num_classes=len(labelset), average='micro')
+    p_avg = precision(preds, golds, task='multiclass', num_classes=len(labelset), average='micro')
+    r_avg = recall(preds, golds, task='multiclass', num_classes=len(labelset), average='micro')
+    f_avg = f1_score(preds, golds, task='multiclass', num_classes=len(labelset), average='micro')
+    a = accuracy(preds, golds, task='multiclass', num_classes=len(labelset), average='none')
+    p = precision(preds, golds, task='multiclass', num_classes=len(labelset), average='none')
+    r = recall(preds, golds, task='multiclass', num_classes=len(labelset), average='none')
+    f = f1_score(preds, golds, task='multiclass', num_classes=len(labelset), average='none')
+    m = confusion_matrix(preds, golds, task='multiclass', num_classes=len(labelset))
+
+    for i, label in enumerate(labelset):
+        label_metrics[label] = {"Model_Name": img_enc_name,
+                                "Label": label,
+                                "Accuracy": a[i].item(),
+                                "Precision": p[i].item(),
+                                "Recall": r[i].item(),
+                                "F1-Score": f[i].item()}
+    writer = csv.DictWriter(out, fieldnames=["Model_Name", "Label", "Accuracy", "Precision", "Recall", "F1-Score"], lineterminator='\n')
+    writer.writeheader()
+    writer.writerow({"Model_Name": img_enc_name,
+                     "Label": "Overall",
+                     "Accuracy": a_avg.item(),
+                     "Precision": p_avg.item(),
+                     "Recall": r_avg.item(),
+                     "F1-Score": f_avg.item()})
+    for label, metrics in label_metrics.items():
+        writer.writerow(metrics)
+    out.write('\n\n')
+    out.write("Confusion Matrix (cols = preds, rows = golds)\n")
+    col_sums = torch.sum(m, dim=0)
+    row_sums = torch.sum(m, dim=1)
+    # longest_label_len with minimum 5 digits 
+    lll = max(5, max(len(label) for label in labelset))
+    out.write(f'{"":<{lll}},')
+    for label in labelset:
+        out.write(f'{label:>{lll}},')
+    out.write(f'{"+":>{lll}}\n')
+    for i, label in enumerate(labelset):
+        out.write(f'{label:>{lll}},')
+        for j in range(len(labelset)):
+            out.write(f'{m[i][j]:>{lll}},')
+        out.write(f'{row_sums[i]:>{lll}}\n')
+    out.write(f'{"+":>{lll}},')
+    for col_sum in col_sums:
+        out.write(f'{col_sum:>{lll}},')
+    if torch.sum(col_sums) == torch.sum(row_sums):
+        out.write(f'{torch.sum(col_sums):>{lll}}\n')
+    else:
+        out.write(f'{"!!!":>{lll}}\n')
+    return p, r, f
diff --git a/scripts/see_results.py b/scripts/see_results.py
deleted file mode 100644
index 077a444..0000000
--- a/scripts/see_results.py
+++ /dev/null
@@ -1,344 +0,0 @@
-import argparse
-import base64
-import csv
-import os
-from collections import defaultdict
-from io import BytesIO
-from itertools import product
-from statistics import mean
-
-import matplotlib.pyplot as plt
-import numpy as np
-import yaml
-
-
-def get_configs_and_macroavgs(directory, target_labels=[]):
-    """
-    1. Iterate over all files in the directory
-    2. Get configuration information
-    3. Calculate the averages of accuracy, precision, recall, and f1-score for each label for each set of k_fold results.
-    4. Save and return them in a dictionary format.
-    :param directory: where evaluation results files are stored
-    :param target_labels: list of labels to calculate macro average. If empty, all labels are used.
-    :return: 1. A dictionary with ids as keys and the configuration dictionary as values : dict[id][parameter]->value
-            2. A dictionary with ids as keys and the macro average dictionary as values: dict[id][label][metric]->value
-    """
-    result_sets = defaultdict(list)
-    # Iterate over all files in the directory
-    if directory == "":
-        directory = os.getcwd()
-    for filename in os.listdir(directory):
-        file_path = os.path.join(directory, filename)
-        result_sets[filename.split(".")[0]].append(file_path)
-
-    # Store the evaluation results in the dictionary form
-    macro_avgs = {}
-    configs = {}
-    for key, value in result_sets.items():
-        macro_avg = defaultdict(lambda: defaultdict(float))
-        i = 0
-        for file in value:
-            if file.endswith(".csv"):
-                i += 1
-                with open(file, "r") as f:
-                    csv_reader = csv.DictReader(f)
-                    for row in csv_reader:
-                        if target_labels and row['Label'] not in target_labels:
-                            continue
-                        macro_avg[row['Label']]['Accuracy'] += float(row['Accuracy'])
-                        macro_avg[row['Label']]['Precision'] += float(row['Precision'])
-                        macro_avg[row['Label']]['Recall'] += float(row['Recall'])
-                        macro_avg[row['Label']]['F1-Score'] += float(row['F1-Score'])
-
-            if file.endswith(".yml"):
-                with open(file, "r") as f:
-                    data = yaml.safe_load(f)
-                # delete unnecessary items
-                del data['block_guids_train']
-                del data['block_guids_valid']
-                del data['num_splits']
-                configs[key] = data
-
-        # Calculate macro averages
-        for label, scores in macro_avg.items():
-            for prf_metric in scores:
-                scores[prf_metric] = scores[prf_metric] / float(i)
-
-        # Add overall macro averages for all labels for each set.
-        num_classes = len(macro_avg)
-        macro_avg["overall"] = defaultdict(float)
-        for label, scores in macro_avg.items():
-            if label != "overall":
-                for prf_metric in scores:
-                    macro_avg["overall"][prf_metric] += scores[prf_metric] / num_classes
-
-        macro_avgs[key] = macro_avg
-
-    return configs, macro_avgs
-
-
-def get_inverse_configs(configs):
-    """
-    Get inverse dictionary for configurations that allow user to find IDs from configurations.
-    :param configs: A dictionary with IDs as keys and a dictionary with configurations as values.
-    :return: A nested dictionary with parameter name as 1st keys, parameter value as 2nd key and a set of IDs as values.
-    """
-    inverse_dict = defaultdict(lambda: defaultdict(set))
-    for key, val in configs.items():
-        for k, v in val.items():
-            inverse_dict[k][v].add(key)
-
-    return inverse_dict
-
-
-def get_grid(configs):
-    """
-    Get grid of configurations used.
-    :param configs: A dictionary with IDs as keys and a dictionary with configurations as values.
-    :return: A dictionary with parameter name as keys and list of parameter used in grid search as value.
-    """
-    grid = defaultdict(set)
-    for value in configs.values():
-        for k, v in value.items():
-            grid[k].add(v)
-
-    for key, val in grid.items():
-        grid[key] = list(val)
-
-    return grid
-
-
-def get_labels(macroavgs):
-    """
-    Get list of labels. This is needed because some sets doesn't have results for some labels.
-    :param macroavgs: A dictionary of macro averages of results that was got from get_configs_and_macroavgs function.
-    :return: A list of labels
-    """
-    labels = set()
-    for key, val in macroavgs.items():
-        labels.update(val.keys())
-    return list(labels)
-
-
-def get_pairs_to_compare(grid, inverse_configs, variable):
-    """
-    Get a list of pairs(lists of IDs) where all configurations are the same except for one given variable.
-    :param grid: Grid of configurations used in this experiment
-    :param inverse_configs: A dictionary that allows user to search IDs from configurations.
-    :param variable: the variable parameter.
-    :return:  A list of pairs(lists of IDs)
-    """
-
-    # Delete variable key from grid and inverse_configs dictionary
-    del grid[variable]
-    del inverse_configs[variable]
-    # Form all possible configurations of parameters from grid and store it as a list of dictionary form.
-    conf_dicts = [dict(zip(grid.keys(), config)) for config in list(product(*grid.values()))]
-
-    # Get all the possible lists of pairs(IDs) using inverse_configs dictionary and intersection of them for every configuration.
-    pair_list = []
-    for conf_dict in conf_dicts:
-        list_of_sets = [inverse_configs[param_name][val] for param_name, val in conf_dict.items()]
-
-        # Get intersection of sets of IDs for given configurations
-        intersection_result = list_of_sets[0]
-        # Iterate over the remaining sets and find the intersection
-        for s in list_of_sets[1:]:
-            intersection_result = intersection_result.intersection(s)
-
-        pair_list.append(list(intersection_result))
-
-    return pair_list
-
-
-def compare_pairs(list_of_pairs, macroavgs, configs, grid, variable, label_to_show, variable_values, interactive_plots=True):
-    """
-    For list of pairs got from get_pairs_to_compare function, compare each pair by plotting bar graphs for given label.
-    :param list_of_pairs: got from get_pairs_to_compare function for given variable
-    :param macroavgs:
-    :param configs:
-    :param grid:
-    :param variable:
-    :param label_to_show: User choice of label (including overall) to show scores in the graph.
-    """
-
-    # Form parameter to color dictionary for consistency in color across all pairs
-    param_to_color = dict((str(value), f'C{i}') for i, value in enumerate(grid[variable]))
-
-    html = '<html><head><title>Comparison of pairs</title></head><body>'
-
-    # For each pair, form a data dictionary as data = { ID1: [accuracy, precision, recall, f1], ...}
-    # and plot a bar graph
-    fig, ax = plt.subplots()
-    all_ps = [[] for _ in range(len(list_of_pairs[0]))]
-    all_rs = [[] for _ in range(len(list_of_pairs[0]))]
-    for pair in list_of_pairs:
-        # re-order the pair to show the variable values in the same order as in the grid
-        ordered_pair = [None] * len(variable_values)
-        for i, value in enumerate(variable_values):
-            for exp_id in pair:
-                if configs[exp_id][variable] == value:
-                    ordered_pair[i] = exp_id
-        scores = macroavgs[ordered_pair[0]][label_to_show]
-        data = defaultdict(list)
-        metric_list = ['Avg Accuracy', 'Avg Precision', 'Avg Recall', 'Avg F1-Score']
-        for i, exp_id in enumerate(ordered_pair):
-            for metric, score in scores.items():
-                if label_to_show in macroavgs[exp_id]:
-                    data[exp_id].append(macroavgs[exp_id][label_to_show][metric])
-                    if 'preci' in metric.lower():
-                        all_ps[i].append(macroavgs[exp_id][label_to_show][metric])
-                    if 'recal' in metric.lower():
-                        all_rs[i].append(macroavgs[exp_id][label_to_show][metric])
-                else:
-                    data[exp_id].append(0.0)
-        data = dict(data)
-
-        # plot a bar graph
-        x = np.arange(len(metric_list))  # the label locations
-        l = len(data)  # length of data (it varies by set)
-        width = 1 / (l + 1)  # the width of the bars
-        multiplier = 0
-
-        if l != 0:
-            for exp_id, scores in data.items():
-                id_variable = str(variable) + ": " + str(configs[exp_id][variable])
-                offset = width * multiplier
-                rects = ax.bar(x + offset, scores, width, label=id_variable, color=param_to_color[str(configs[exp_id][variable])])
-                ax.bar_label(rects, fmt='%.6s', fontsize='small', rotation='vertical', padding=3)
-                multiplier += 1
-
-            # Add some text for labels, title and custom x-axis tick labels, etc.
-            ax.set_ylabel('Score')
-            ax.set_title(str(label_to_show))
-            ax.set_xticks(x + width * (l - 1) / 2, metric_list)
-            ax.legend(loc='center left', fontsize='small', ncol=1, bbox_to_anchor=(1, 0.5))
-            ax.set_ylim(0.0, 1.15)
-            # Show information on fixed parameters.
-            configs[exp_id].pop(variable)
-            string_configs = ""
-            for k, v in configs[exp_id].items():
-                string_configs += str(k) + ": " + str(v) + "\n"
-            ax.text(0.99, 0.97, string_configs,
-                    verticalalignment='bottom', horizontalalignment='right',
-                    transform=ax.transAxes,
-                    color='green', fontsize='small')
-
-            if interactive_plots:
-                plt.show()
-            else:
-                temp_io_stream = BytesIO()
-                fig.savefig(temp_io_stream, format='png', bbox_inches='tight')
-                html += f'<p><img src="data:image/png;base64,{base64.b64encode(temp_io_stream.getvalue()).decode("utf-8")}"></p>'
-        plt.cla()
-    for i, var_val in enumerate(variable_values):
-        if interactive_plots:
-            print(f'{var_val}\t{round(mean(all_ps[i]), 4)}\t{round(mean(all_rs[i]), 4)}')
-        else:
-            html += f'<p>{var_val}\t{round(mean(all_ps[i]), 4)}\t{round(mean(all_rs[i]), 4)}</p>'
-
-    if not interactive_plots:
-        if label_to_show == 'overall':
-            labels = set()
-            for _, scores_by_label in macroavgs.items():
-                labels.update(scores_by_label.keys())
-            label_to_show = '+'.join(sorted(labels))
-        html += '</body></html>'
-        with open(f'results-comparison-{variable}-{label_to_show}.html', 'w') as f:
-            f.write(html)
-
-
-def user_input_variable(grid):
-    """
-    A function to receive user input on which parameter to vary.
-    :param grid: dictionary of variable names and list of values.
-    :return: user choice among parameter names in grid.
-    """
-    try:
-        choice = str(input("\nEnter one parameter to vary from " + str(list(grid.keys())) + "\n:"))
-        if choice in grid.keys():
-            return choice
-        else:
-            raise ValueError("Invalid argument for variable. Please enter one of ", list(grid.keys()))
-    except ValueError:
-        raise argparse.ArgumentTypeError("Invalid argument for variable. Please enter one of ", list(grid.keys()))
-
-
-def user_input_label(label_list):
-    """
-    A function to receive user input on which label to plot and show the scores.
-    :param label_list:
-    :return: user choice among label names in label_list.
-    """
-    try:
-        choice = str(input("\nEnter a label for comparing results: " + str(label_list) + "\n:"))
-        if choice in label_list:
-            return choice
-        else:
-            raise ValueError("Invalid argument for variable. Please enter one of ", label_list)
-    except ValueError:
-        raise argparse.ArgumentTypeError("Invalid argument for variable. Please enter one of ",
-                                         label_list)
-
-
-if __name__ == '__main__':
-
-    parser = argparse.ArgumentParser()
-    parser.add_argument(
-        "directory",
-        type=str,
-        help="Directory with result and configuration files",
-        default="",
-    )
-    parser.add_argument(
-        '-l', '--label',
-        default='overall',
-        action='store',
-        nargs='?',
-        help='Pick a label to compare, default is overall, meaning all labels are plotted'
-    )
-    parser.add_argument(
-        '-L', '--target-labels',
-        default='B S I N Y C R'.split(),
-        nargs='+',
-        help='List of labels to calculate macro average. Default to the "source" labels used in 4-way "post" remapping'
-    )
-    parser.add_argument(
-        '-k', '--config-key',
-        default=None,
-        action='store',
-        nargs='?',
-        help='Pick a config key to "pin" the comparison to. '
-             'When this is not set, the program runs in "interactive" mode, '
-             'where the user is prompted to pick a key and a label to compare.'
-    )
-    parser.add_argument(
-        '-i', '--interactive-plots',
-        action='store_true',
-        help='Flag to show plots in interactive mode. If not set, the program will save all the plots in a html file.'
-    )
-
-    args = parser.parse_args()
-
-    # Get necessary dictionaries and lists for processing the comparison.
-    configs, macroavgs = get_configs_and_macroavgs(args.directory, args.target_labels)
-    label_list = get_labels(macroavgs)
-    inverse_configs = get_inverse_configs(configs)
-    grid = get_grid(configs)
-    if args.config_key is None:
-        # Get user inputs and prepare the list of pairs
-        choice_variable = user_input_variable(grid)
-        choice_label = user_input_label(label_list)
-    else:
-        if args.config_key in grid:
-            choice_variable = args.config_key
-        else:
-            raise argparse.ArgumentTypeError("Invalid argument for variable. Please enter one of ", list(grid.keys()))
-        if args.label in label_list:
-            choice_label = args.label
-        else:
-            raise argparse.ArgumentTypeError("Invalid argument for label. Please enter one of ", label_list)
-    variable_values = sorted(grid[choice_variable].copy())
-    list_of_pairs = get_pairs_to_compare(grid.copy(), inverse_configs, choice_variable)
-    # Show the comparison results of pairs in bar graphs
-    compare_pairs(list_of_pairs, macroavgs, configs.copy(), grid, choice_variable, choice_label, variable_values, interactive_plots=args.interactive_plots)
\ No newline at end of file
diff --git a/scripts/dev.visualize.py b/visualize/mmif-timepoint-view.py
similarity index 100%
rename from scripts/dev.visualize.py
rename to visualize/mmif-timepoint-view.py
diff --git a/visualize/stitching-gridsearch-results.py b/visualize/stitching-gridsearch-results.py
new file mode 100644
index 0000000..63e96ed
--- /dev/null
+++ b/visualize/stitching-gridsearch-results.py
@@ -0,0 +1,133 @@
+"""
+Script to generate hiplot-based parallel coordinates plots from the results of a grid search on the stitcher app.
+
+"""
+import csv
+import json
+import pathlib
+import sys
+from collections import defaultdict
+
+import hiplot as hip
+
+import modeling.config.bins
+
+#  from modeling import gridsearch
+
+hyperparams = {'tfMinTPScore': hip.ValueDef(value_type=hip.ValueType.NUMERIC, colormap='interpolateTurbo'),
+               'tfMinTFScore': hip.ValueDef(value_type=hip.ValueType.NUMERIC, colormap='interpolateTurbo'),
+               'tfLabelMapFn': hip.ValueDef(value_type=hip.ValueType.CATEGORICAL),
+               'tfMinNegTFDuration': hip.ValueDef(value_type=hip.ValueType.NUMERIC, colormap='interpolateTurbo'),
+               'tfMinTFDuration': hip.ValueDef(value_type=hip.ValueType.NUMERIC, colormap='interpolateTurbo'),
+               'tfAllowOverlap': hip.ValueDef(value_type=hip.ValueType.CATEGORICAL)}
+results_dir = pathlib.Path(sys.argv[1])
+
+all_lbl = 'AVG'
+all_binned_lbl = 'AVGBIN'
+# only used when stitcher's labelMap is a identity function (no postbin)
+binname = sys.argv[2] if len(sys.argv) > 2 else '!'
+
+bins_of_interest = modeling.config.bins.binning_schemes.get(binname, {})
+raw_lbls = "BSWLOMINEPYKUGTFCR"
+lbls = [all_lbl] + list(raw_lbls) + list(bins_of_interest.keys())
+inverse_bins = {v: k for k, vs in bins_of_interest.items() for v in vs}
+data = defaultdict(list)
+
+
+def is_identity(d):
+    for key, value in d.items():
+        if key != value:
+            return False
+    return True
+
+
+for exp in results_dir.iterdir():
+    if exp.is_dir() and (exp / 'results.csv').exists() and (exp / 'appConfiguration.json').exists():
+        configs = json.load((exp / 'appConfiguration.json').open())
+        # when there's actual postbinning, 
+        if not is_identity(configs['tfLabelMap']):
+            inverse_bins = configs['tfLabelMap']
+            lbls = [all_lbl] + list(raw_lbls) + list(set(configs['tfLabelMap'].values()))
+            bins_of_interest = defaultdict(list)
+            for k, v in configs['tfLabelMap'].items():
+                bins_of_interest[v].append(k)
+        ## some "fixed" parameters
+        # if configs['tfAllowOverlap']:
+        #     continue
+        # if configs['tfLabelMapFn'] == 'sum':
+        #     continue
+        # if 1 < configs['tfMinNegTFDuration'] < 1000:  # TP samplerate is 1000, so skip values under 
+        #     continue
+
+        base_params = {hp: configs[hp] for hp in hyperparams}
+
+        exp_raw_scores = {}
+        exp_bin_scores = defaultdict(lambda: {'P': {'filtered': [], 'stitched': []},
+                                              'R': {'filtered': [], 'stitched': []},
+                                              'F': {'filtered': [], 'stitched': []}})
+        with (exp / 'results.csv').open() as f:
+            reader = csv.DictReader(f)
+            score_dict = {row['labels']: float(row['@@@ALL@@@']) for row in reader}
+            for lbl in lbls:
+                for met in 'PRF':
+                    if f'{lbl} {met} STITCHED' in score_dict:
+                        raw_score = score_dict.get(f'{lbl} {met} FILTERED', 0.0)
+                        binarized_score = score_dict.get(f'{lbl} {met} STITCHED', 0.0)
+                        exp_raw_scores[f'{lbl}-{met}-filtered'] = raw_score
+                        exp_raw_scores[f'{lbl}-{met}-stitched'] = binarized_score
+                        exp_raw_scores[f'{lbl}-{met}-diff'] = binarized_score - raw_score
+                        exp_bin_scores[all_lbl][met]['filtered'].append(raw_score)
+                        exp_bin_scores[all_lbl][met]['stitched'].append(binarized_score)
+                        if lbl in inverse_bins:
+                            exp_bin_scores[inverse_bins[lbl]][met]['filtered'].append(raw_score)
+                            exp_bin_scores[inverse_bins[lbl]][met]['stitched'].append(binarized_score)
+                            exp_bin_scores[all_binned_lbl][met]['filtered'].append(raw_score)
+                            exp_bin_scores[all_binned_lbl][met]['stitched'].append(binarized_score)
+        exp_bin_avg_scores = {}
+        for lbl, scores in exp_bin_scores.items():
+            for met, cond in scores.items():
+                raw_avg = sum(cond['filtered']) / len(cond['filtered'])
+                map_avg = sum(cond['stitched']) / len(cond['stitched'])
+                diff_avg = map_avg - raw_avg
+                exp_bin_avg_scores[f'{lbl}-{met}-filtered'] = raw_avg
+                exp_bin_avg_scores[f'{lbl}-{met}-stitched'] = map_avg
+                exp_bin_avg_scores[f'{lbl}-{met}-diff'] = diff_avg
+
+        for d in [exp_raw_scores, exp_bin_avg_scores]:
+            for lbl_and_type, score in d.items():
+                # if score < 0:
+                #     continue
+                params = base_params.copy()
+                l, m, c = lbl_and_type.rsplit('-', 2)
+                t = f'{m}-{c}'
+                params['score'] = score
+                if l in bins_of_interest:
+                    bin_num = list(bins_of_interest.keys()).index(l)
+                    l_for_sorting = f'{bin_num}.{l}'
+                elif l in inverse_bins:
+                    bin_num = list(bins_of_interest.keys()).index(inverse_bins[l])
+                    l_for_sorting = f'{bin_num}@{l}'
+                else:
+                    l_for_sorting = l
+                    print(l, exp_bin_scores)
+                    if len(bins_of_interest) != 0:  # meaning, a binning is used, then skip uninteresting labels
+                        # the `|exp_bin_scores|==1` means the binning was actually a postbinning, in this case there's no "uninteresting" labels
+                        if len(exp_bin_scores) > 1 and l != all_binned_lbl:
+                            continue
+                    else:
+                        if l == all_lbl:  # when a binning is used, skip total avg, but keep bin-level avg
+                            continue
+                params['label'] = l_for_sorting
+                data[t].append(params)
+
+# print(data)
+
+for k, v in data.items():
+    out_fname = f'stitcher-results-{binname}-{k}.html'
+    print(k, f'({len(v)} items)', '>>', out_fname)
+    p = hip.Experiment.from_iterable(v)
+    p.parameters_definition = hyperparams
+    p.parameters_definition['score'] = hip.ValueDef(value_type=hip.ValueType.NUMERIC, colormap='interpolateTurbo')
+    p.colorby = 'score'
+    with open(out_fname, 'w') as out_f:
+        p.to_html(out_f)
diff --git a/visualize/training-gridsearch-results.py b/visualize/training-gridsearch-results.py
new file mode 100644
index 0000000..f22020e
--- /dev/null
+++ b/visualize/training-gridsearch-results.py
@@ -0,0 +1,183 @@
+import csv
+import pathlib
+import sys
+from collections import defaultdict
+
+import hiplot as hip
+import yaml
+
+import modeling.config.bins
+import modeling.gridsearch
+
+hyperparams = modeling.gridsearch.clss_param_keys
+
+results_dir = pathlib.Path(sys.argv[1])
+
+avg_alllbl = '!AVG'
+avg_allbin = '!AVGBIN'
+
+labels_with_isseus = [
+    'U',  # no training instances
+    'K',  # no evaluation instances
+    'W',  # almost no instances both in training and evaluation
+]
+
+bin_scheme_name = sys.argv[2] if len(sys.argv) > 2 else 'nomap'
+bins_of_interest = modeling.config.bins.binning_schemes.get(bin_scheme_name, {})
+for k, vs in bins_of_interest.items():
+    no_subtypes = [v for v in vs if ':' not in v]
+    bins_of_interest[k] = no_subtypes
+
+
+def backbone_sorter():
+    import re
+    backbone_scr = pathlib.Path(modeling.gridsearch.__file__).parent / 'backbones.py'
+    backbone_names = []
+    with open(backbone_scr) as backbone_f:
+        for line in backbone_f:
+            # regex match for `    name = "vgg16"` or `    name = 'bn_vgg16'`
+            match = re.match(r'\s+name\s*=\s*["\']([^"\']+)["\']', line)
+            if match:
+                backbone_names.append(match.group(1))
+    return backbone_names
+        
+raw_lbls = modeling.FRAME_TYPES 
+lbls = [avg_alllbl] + raw_lbls + list(bins_of_interest.keys())
+inverse_bins = {v: k for k, vs in bins_of_interest.items() for v in vs}
+data = defaultdict(list)
+
+
+def is_identity(d):
+    for key, value in d.items():
+        if key != value:
+            return False
+    return True
+
+
+experiments = defaultdict(set)
+# training results are stored as <TIMESTAMP>.<BACKBONE_NAME>.<POSENC>.{csv,yml}
+for f in results_dir.iterdir():
+    exp_id, ext = f.name.rsplit('.', 1)
+    experiments[exp_id].add(ext)
+
+exps = [exp_id for exp_id, exts in experiments.items() if 'csv' in exts and 'yml' in exts]
+
+
+def clean_config(config, prebin_name=None):
+    """
+    Clean up the configuration found in a yml file with more human friendly names. 
+    """
+    bgtrain = list(set(config['block_guids_train']))
+    bgvalid = list(set(config['block_guids_valid']))
+    config['block_guids_train'] = f'{len(bgtrain):04}@{hash(str(sorted(bgtrain)))}'
+    config['block_guids_valid'] = f'{len(bgvalid):04}@{hash(str(sorted(bgvalid)))}'
+
+    # a short string name of the prebin can be passed as an argument or can be generated from dictionary in the config 
+    if prebin_name:
+        config['prebin'] = prebin_name
+    elif 'prebin' in config:
+        config['prebin'] = f'{len(config["prebin"])}way@{hash(str(config["prebin"]))}'
+    else:
+        config['prebin'] = 'None'
+
+    config['posenc'] = config['pos_vec_coeff'] > 0
+    del config['pos_vec_coeff']
+
+    del config['split_size']
+    return config
+
+img_encer_sorter = backbone_sorter()
+for exp in exps:
+    configs = yaml.safe_load((results_dir / f'{exp}.yml').open())
+    # TODO (krim @ 11/6/24): add handling of prebins when we're using it (currently not using)
+    # if 'prebin' in configs:
+    configs = clean_config(configs)
+    
+    # skip uninsteresting configurations
+    if configs['pos_unit'] == 1000:
+        continue
+    
+
+    base_params = {hp: configs[hp] for hp in hyperparams if hp in configs}
+    # then add back all configs keys that were renamed in `clean_config`
+    for k, v in configs.items():
+        if k not in base_params:
+            base_params[k] = v
+
+    exp_raw_scores = defaultdict(lambda: {'P': 0.0, 'R': 0.0, 'F': 0.0})
+    exp_bin_scores = defaultdict(lambda: {'P': [], 'R': [], 'F': []})
+    
+    with (results_dir / f'{exp}.csv').open() as f:
+        reader = csv.DictReader(f)
+        for row in reader:
+            if 'Confusion Matrix' in row['Model_Name'] or not row:
+                break
+            lbl = row['Label']
+            if lbl == 'Overall':
+                lbl = avg_alllbl
+            for met in 'Precision Recall F1-Score'.split():
+                exp_raw_scores[lbl][met[0]] = float(row[met])
+                if lbl in inverse_bins:
+                    exp_bin_scores[inverse_bins[lbl]][met[0]].append(float(row[met]))
+                    exp_bin_scores[avg_allbin][met[0]].append(float(row[met]))
+    for binname, scores in exp_bin_scores.items():
+        for met, scorelist in scores.items():
+            exp_raw_scores[binname][met] = sum(scorelist) / len(scorelist)
+            
+    for lbl, scores in exp_raw_scores.items():
+        if lbl in labels_with_isseus:
+            continue
+        for met, score in scores.items():
+            params = base_params.copy()
+            params['score'] = score
+            # when there's any bin (post bin or average bin), we prefix bin name
+            if lbl in bins_of_interest:
+                bin_num = list(bins_of_interest.keys()).index(lbl)
+                l_for_sorting = f'{bin_num}.{lbl}({"".join(bins_of_interest[lbl])})'
+            # AND label as well
+            elif lbl in inverse_bins:
+                # except for that the bin is a singleton
+                if bins_of_interest[inverse_bins[lbl]] == [lbl]:
+                    continue
+                bin_num = list(bins_of_interest.keys()).index(inverse_bins[lbl])
+                l_for_sorting = f'{bin_num}@{lbl}'
+            # if the label wasn't in the bin dicts, it's an "uninteresting" one
+            else:
+                l_for_sorting = lbl
+                # if average binning is used, 
+                if len(bins_of_interest) != 0:  
+                    # then skip uninteresting labels
+                    if lbl != avg_allbin:
+                        continue
+                # when "binary" binning, skip the average
+                if len(bins_of_interest) == 1 and lbl == avg_allbin:
+                    continue
+            # else:
+                #     if lbl == avg_alllbl:  # when a binning is used, skip total avg, but keep bin-level avg
+                #         continue
+            params['label'] = l_for_sorting
+            params['img_enc_name'] = f'{img_encer_sorter.index(params["img_enc_name"]):03}.{params["img_enc_name"]}'
+            
+            # remove unused params
+            for unused in ('pos_unit', 'pos_length', 'pos_abs_th_end', 'pos_abs_th_front', 'prebin'):
+                params.pop(unused)
+            
+            data[met].append(params)
+
+for k, v in data.items():
+    out_html_fname = f'{results_dir.name}-gridsearch-results-{bin_scheme_name}-{k}.html'
+    out_csv_fname = f'{results_dir.name}-gridsearch-results-{bin_scheme_name}-{k}.csv'
+    print(k, f'({len(v)} items)', '>>', out_html_fname)
+    p = hip.Experiment.from_iterable(v)
+    # p.parameters_definition = hyperparams
+    for hp in hyperparams:
+        if hp == 'score' or hp.startswith('num_'):
+            p.parameters_definition[hp] = hip.ValueDef(value_type=hip.ValueType.NUMERIC, colormap='interpolateTurbo')
+        elif hp in ('img_enc_name', 'block_guids_train', 'block_guids_valid', 'prebin'):
+            p.parameters_definition[hp] = hip.ValueDef(value_type=hip.ValueType.CATEGORICAL, colormap='interpolateViridis')
+    p.colorby = 'score'
+    with open(out_html_fname, 'w') as out_f:
+        p.to_html(out_f)
+    print(k, f'({len(v)} items)', '>>', out_csv_fname)
+    with open(out_csv_fname, 'w') as out_f:
+        p.to_csv(out_f)