clamsproject · keighrim · Apr 9, 2024 · Apr 9, 2024
diff --git a/app.py b/app.py
@@ -24,32 +24,6 @@
 default_model_storage = Path(__file__).parent / 'modeling/models'
 
 
-def _extract_frames_as_images(video_document, framenums, as_PIL: bool = False):
-    """
-    ``extract_frames_as_images`` in mmif.utils.video_document_helper is using a slower
-    iteration over the framenums. This method is a faster alternative, and monkeypatches the one in the SDK.
-    """
-    if as_PIL:
-        from PIL import Image
-    frames = []
-    video = vdh.capture(video_document)
-    cur_f = 0
-    while True:
-        if not framenums or cur_f > video_document.get_property(vdh.FRAMECOUNT_DOCPROP_KEY):
-            break
-        ret, frame = video.read()
-        if not ret:
-            break
-        if cur_f == framenums[0]:
-            frames.append(Image.fromarray(frame[:, :, ::-1]) if as_PIL else frame)
-            framenums.pop(0)
-        cur_f += 1
-    return frames
-
-
-vdh.extract_frames_as_images = _extract_frames_as_images
-
-
 class SwtDetection(ClamsApp):
 
     def __init__(self, preconf_fname: str = None, log_to_file: bool = False) -> None:
@@ -61,14 +35,11 @@ def __init__(self, preconf_fname: str = None, log_to_file: bool = False) -> None
             self.logger.addHandler(fh)
 
     def _appmetadata(self):
-        # see https://sdk.clams.ai/autodoc/clams.app.html#clams.app.ClamsApp._load_appmetadata
-        # Also check out ``metadata.py`` in this directory. 
-        # When using the ``metadata.py`` leave this do-nothing "pass" method here. 
+        # using metadata.py
         pass
 
     def _annotate(self, mmif: Union[str, dict, Mmif], **parameters) -> Mmif:
-        # possible bug here, as the configuration will be updated with the parameters that's not defined in the 
-        # app metadata, but passed at the run time.
+        # parameter here is "refined" dict, so hopefully its values are properly validated and casted at this point. 
         configs = {**self.preconf, **parameters}
         for k, v in configs.items():
             self.logger.debug(f"Final Configuraion: {k} :: {v}")
@@ -83,14 +54,14 @@ def _annotate(self, mmif: Union[str, dict, Mmif], **parameters) -> Mmif:
         self.logger.info(f"Initiating classifier with {configs['model_file']}")
         if self.logger.isEnabledFor(logging.DEBUG):
             configs['logger_name'] = self.logger.name
-        self.classifier = classify.Classifier(**configs)
-        self.stitcher = stitch.Stitcher(**configs)
+        classifier = classify.Classifier(**configs)
+        stitcher = stitch.Stitcher(**configs)
         if self.logger.isEnabledFor(logging.DEBUG):
             self.logger.debug(f"Classifier initiation took {time.perf_counter() - t} seconds")
 
         new_view: View = mmif.new_view()
         self.sign_view(new_view, parameters)
-        self.logger.info('Minimum time frame score: %s', self.stitcher.min_timeframe_score)
+        self.logger.info('Minimum time frame score: %s', stitcher.min_timeframe_score)
 
         vds = mmif.get_documents_by_type(DocumentTypes.VideoDocument)
         if not vds:
@@ -114,7 +85,8 @@ def _annotate(self, mmif: Union[str, dict, Mmif], **parameters) -> Mmif:
         extracted = vdh.extract_frames_as_images(vd, sampled, as_PIL=True)
 
         self.logger.debug(f"Seeking time: {time.perf_counter() - t:.2f} seconds\n")
-        predictions = self.classifier.classify_images(extracted, positions, total_ms)
+        # the last `total_ms` (as a fixed value) only works since the app is processing only one video at a time 
+        predictions = classifier.classify_images(extracted, positions, total_ms)
         if self.logger.isEnabledFor(logging.DEBUG):
             self.logger.debug(f"Processing took {time.perf_counter() - t} seconds")
 
@@ -135,8 +107,8 @@ def _annotate(self, mmif: Union[str, dict, Mmif], **parameters) -> Mmif:
             return mmif
 
         new_view.new_contain(AnnotationTypes.TimeFrame,
-                             document=vd.id, timeUnit='milliseconds', labelset=list(self.stitcher.stitch_label.keys()))
-        timeframes = self.stitcher.create_timeframes(predictions)
+                             document=vd.id, timeUnit='milliseconds', labelset=list(stitcher.stitch_label.keys()))
+        timeframes = stitcher.create_timeframes(predictions)
         for tf in timeframes:
             timeframe_annotation = new_view.new_annotation(AnnotationTypes.TimeFrame)
             timeframe_annotation.add_property("label", tf.label),

diff --git a/modeling/config/classifier.yml b/modeling/config/classifier.yml
@@ -1,5 +1,5 @@
-model_file: "modeling/models/20240126-180026.convnext_lg.kfold_000.pt"
-model_config_file: "modeling/models/20240126-180026.convnext_lg.kfold_config.yml"
+model_file: "modeling/models/20240409-091401.convnext_lg.kfold_013.pt"
+model_config_file: "modeling/models/20240409-091401.convnext_lg.kfold_config.yml"
 
 # Milliseconds between sampled frames
 sampleRate: 1000
@@ -90,5 +90,57 @@ postbins:
       - "G"
       - 'T'
       - 'F'
-
-
+  20240409-091401.convnext_lg.kfold_013:
+    bars:
+      - "B"
+    slate:
+      - "S"
+      - "S:H"
+      - "S:C"
+      - "S:D"
+      - "S:G"
+    other_opening:
+      - "W"
+      - "L"
+      - "O"
+      - "M"
+    chyron:
+      - "I"
+      - "N"
+      - "Y"
+    credit:
+      - "C"
+      - "R"
+    other_text:
+      - "E"
+      - "K"
+      - "G"
+      - 'T'
+      - 'F'
+  20240409-093229.convnext_tiny.kfold_012:
+    bars:
+      - "B"
+    slate:
+      - "S"
+      - "S:H"
+      - "S:C"
+      - "S:D"
+      - "S:G"
+    other_opening:
+      - "W"
+      - "L"
+      - "O"
+      - "M"
+    chyron:
+      - "I"
+      - "N"
+      - "Y"
+    credit:
+      - "C"
+      - "R"
+    other_text:
+      - "E"
+      - "K"
+      - "G"
+      - 'T'
+      - 'F'
diff --git a/modeling/data_loader.py b/modeling/data_loader.py
@@ -125,9 +125,9 @@ def encode_position(self, cur_time, tot_time, img_vec):
             pos_vec = torch.tensor([pos]).to(img_vec.dtype)
             return torch.concat((img_vec, pos_vec))
         elif self.pos_encoder == 'sinusoidal-add':
-            return torch.add(img_vec, self.pos_vec_lookup[(cur_time / self.pos_unit).round()])
+            return torch.add(img_vec, self.pos_vec_lookup[round(cur_time / self.pos_unit)])
         elif self.pos_encoder == 'sinusoidal-concat':
-            return torch.concat((img_vec, self.pos_vec_lookup[(cur_time / self.pos_unit).round()]))
+            return torch.concat((img_vec, self.pos_vec_lookup[round(cur_time / self.pos_unit)]))
         else:
             return img_vec
 

diff --git a/modeling/models/20240409-091401.convnext_lg.kfold_013.csv b/modeling/models/20240409-091401.convnext_lg.kfold_013.csv
@@ -0,0 +1,24 @@
+Model_Name,Label,Accuracy,Precision,Recall,F1-Score
+convnext_lg,B,0.9989030957221985,0.976190447807312,0.9534883499145508,0.9647058844566345
+convnext_lg,S,0.9996343851089478,1.0,0.8888888955116272,0.9411764740943909
+convnext_lg,S:H,1.0,0.0,0.0,0.0
+convnext_lg,S:C,1.0,0.0,0.0,0.0
+convnext_lg,S:D,1.0,0.0,0.0,0.0
+convnext_lg,S:B,1.0,0.0,0.0,0.0
+convnext_lg,S:G,1.0,0.0,0.0,0.0
+convnext_lg,W,1.0,0.0,0.0,0.0
+convnext_lg,L,0.995978057384491,0.7894737124443054,0.6818181872367859,0.7317073345184326
+convnext_lg,O,0.9992687106132507,0.75,0.75,0.75
+convnext_lg,M,0.9879341721534729,0.23333333432674408,0.4117647111415863,0.2978723347187042
+convnext_lg,I,0.9893966913223267,0.9913793206214905,0.8041958212852478,0.8880308866500854
+convnext_lg,N,1.0,0.0,0.0,0.0
+convnext_lg,E,0.9926874041557312,0.0,0.0,0.0
+convnext_lg,P,0.937111496925354,0.91862952709198,0.9884792566299438,0.952275276184082
+convnext_lg,Y,1.0,0.0,0.0,0.0
+convnext_lg,K,1.0,0.0,0.0,0.0
+convnext_lg,G,0.9802559614181519,0.0,0.0,0.0
+convnext_lg,T,1.0,0.0,0.0,0.0
+convnext_lg,F,0.9934186339378357,0.5666666626930237,0.7727272510528564,0.6538461446762085
+convnext_lg,C,0.9897623658180237,0.4285714328289032,0.36000001430511475,0.3913043439388275
+convnext_lg,R,0.9992687106132507,0.6666666865348816,0.6666666865348816,0.6666666865348816
+convnext_lg,NEG,0.9265082478523254,0.9066901206970215,0.7767722606658936,0.8367181420326233
diff --git a/modeling/models/20240409-091401.convnext_lg.kfold_013.pt b/modeling/models/20240409-091401.convnext_lg.kfold_013.pt
diff --git a/modeling/models/20240409-091401.convnext_lg.kfold_config.yml b/modeling/models/20240409-091401.convnext_lg.kfold_config.yml
@@ -0,0 +1,33 @@
+num_splits: 20
+num_epochs: 10
+num_layers: 4
+pos_enc_name: sinusoidal-concat
+max_input_length: 5640000
+pos_unit: 60000
+pos_enc_dim: 256
+dropouts: 0.1
+img_enc_name: convnext_lg
+block_guids_train:
+- cpb-aacip-254-75r7szdz
+block_guids_valid:
+- cpb-aacip-254-75r7szdz
+- cpb-aacip-259-4j09zf95
+- cpb-aacip-526-hd7np1xn78
+- cpb-aacip-75-72b8h82x
+- cpb-aacip-fe9efa663c6
+- cpb-aacip-f5847a01db5
+- cpb-aacip-f2a88c88d9d
+- cpb-aacip-ec590a6761d
+- cpb-aacip-c7c64922fcd
+- cpb-aacip-f3fa7215348
+- cpb-aacip-f13ae523e20
+- cpb-aacip-e7a25f07d35
+- cpb-aacip-ce6d5e4bd7f
+- cpb-aacip-690722078b2
+- cpb-aacip-e649135e6ec
+- cpb-aacip-15-93gxdjk6
+- cpb-aacip-512-4f1mg7h078
+- cpb-aacip-512-4m9183583s
+- cpb-aacip-512-4b2x34nt7g
+- cpb-aacip-512-3n20c4tr34
+- cpb-aacip-512-3f4kk9534t
diff --git a/modeling/models/20240409-093229.convnext_tiny.kfold_012.csv b/modeling/models/20240409-093229.convnext_tiny.kfold_012.csv
@@ -0,0 +1,24 @@
+Model_Name,Label,Accuracy,Precision,Recall,F1-Score
+convnext_tiny,B,1.0,1.0,1.0,1.0
+convnext_tiny,S,1.0,1.0,1.0,1.0
+convnext_tiny,S:H,1.0,0.0,0.0,0.0
+convnext_tiny,S:C,1.0,0.0,0.0,0.0
+convnext_tiny,S:D,1.0,0.0,0.0,0.0
+convnext_tiny,S:B,1.0,0.0,0.0,0.0
+convnext_tiny,S:G,1.0,0.0,0.0,0.0
+convnext_tiny,W,1.0,0.0,0.0,0.0
+convnext_tiny,L,0.9955801367759705,0.6666666865348816,0.8571428656578064,0.75
+convnext_tiny,O,1.0,0.0,0.0,0.0
+convnext_tiny,M,0.9933701753616333,0.4000000059604645,0.4000000059604645,0.4000000059604645
+convnext_tiny,I,0.9955801367759705,1.0,0.692307710647583,0.8181818127632141
+convnext_tiny,N,1.0,0.0,0.0,0.0
+convnext_tiny,E,1.0,0.0,0.0,0.0
+convnext_tiny,P,0.8972375392913818,0.8815286755561829,1.0,0.937034547328949
+convnext_tiny,Y,1.0,0.0,0.0,0.0
+convnext_tiny,K,1.0,0.0,0.0,0.0
+convnext_tiny,G,0.9988950490951538,0.0,0.0,0.0
+convnext_tiny,T,1.0,0.0,0.0,0.0
+convnext_tiny,F,1.0,0.0,0.0,0.0
+convnext_tiny,C,0.9977900385856628,0.8333333134651184,1.0,0.9090909361839294
+convnext_tiny,R,0.9955801367759705,0.0,0.0,0.0
+convnext_tiny,NEG,0.8983425498008728,0.9803921580314636,0.3546099364757538,0.5208333134651184
diff --git a/modeling/models/20240409-093229.convnext_tiny.kfold_012.pt b/modeling/models/20240409-093229.convnext_tiny.kfold_012.pt
diff --git a/modeling/models/20240409-093229.convnext_tiny.kfold_config.yml b/modeling/models/20240409-093229.convnext_tiny.kfold_config.yml
@@ -0,0 +1,33 @@
+num_splits: 20
+num_epochs: 10
+num_layers: 4
+pos_enc_name: sinusoidal-concat
+max_input_length: 5640000
+pos_unit: 60000
+pos_enc_dim: 256
+dropouts: 0.1
+img_enc_name: convnext_tiny
+block_guids_train:
+- cpb-aacip-254-75r7szdz
+block_guids_valid:
+- cpb-aacip-254-75r7szdz
+- cpb-aacip-259-4j09zf95
+- cpb-aacip-526-hd7np1xn78
+- cpb-aacip-75-72b8h82x
+- cpb-aacip-fe9efa663c6
+- cpb-aacip-f5847a01db5
+- cpb-aacip-f2a88c88d9d
+- cpb-aacip-ec590a6761d
+- cpb-aacip-c7c64922fcd
+- cpb-aacip-f3fa7215348
+- cpb-aacip-f13ae523e20
+- cpb-aacip-e7a25f07d35
+- cpb-aacip-ce6d5e4bd7f
+- cpb-aacip-690722078b2
+- cpb-aacip-e649135e6ec
+- cpb-aacip-15-93gxdjk6
+- cpb-aacip-512-4f1mg7h078
+- cpb-aacip-512-4m9183583s
+- cpb-aacip-512-4b2x34nt7g
+- cpb-aacip-512-3n20c4tr34
+- cpb-aacip-512-3f4kk9534t