diff --git a/app.py b/app.py index 98f07ec..1cb4cba 100644 --- a/app.py +++ b/app.py @@ -24,32 +24,6 @@ default_model_storage = Path(__file__).parent / 'modeling/models' -def _extract_frames_as_images(video_document, framenums, as_PIL: bool = False): - """ - ``extract_frames_as_images`` in mmif.utils.video_document_helper is using a slower - iteration over the framenums. This method is a faster alternative, and monkeypatches the one in the SDK. - """ - if as_PIL: - from PIL import Image - frames = [] - video = vdh.capture(video_document) - cur_f = 0 - while True: - if not framenums or cur_f > video_document.get_property(vdh.FRAMECOUNT_DOCPROP_KEY): - break - ret, frame = video.read() - if not ret: - break - if cur_f == framenums[0]: - frames.append(Image.fromarray(frame[:, :, ::-1]) if as_PIL else frame) - framenums.pop(0) - cur_f += 1 - return frames - - -vdh.extract_frames_as_images = _extract_frames_as_images - - class SwtDetection(ClamsApp): def __init__(self, preconf_fname: str = None, log_to_file: bool = False) -> None: @@ -61,14 +35,11 @@ def __init__(self, preconf_fname: str = None, log_to_file: bool = False) -> None self.logger.addHandler(fh) def _appmetadata(self): - # see https://sdk.clams.ai/autodoc/clams.app.html#clams.app.ClamsApp._load_appmetadata - # Also check out ``metadata.py`` in this directory. - # When using the ``metadata.py`` leave this do-nothing "pass" method here. + # using metadata.py pass def _annotate(self, mmif: Union[str, dict, Mmif], **parameters) -> Mmif: - # possible bug here, as the configuration will be updated with the parameters that's not defined in the - # app metadata, but passed at the run time. + # parameter here is "refined" dict, so hopefully its values are properly validated and casted at this point. configs = {**self.preconf, **parameters} for k, v in configs.items(): self.logger.debug(f"Final Configuraion: {k} :: {v}") @@ -83,14 +54,14 @@ def _annotate(self, mmif: Union[str, dict, Mmif], **parameters) -> Mmif: self.logger.info(f"Initiating classifier with {configs['model_file']}") if self.logger.isEnabledFor(logging.DEBUG): configs['logger_name'] = self.logger.name - self.classifier = classify.Classifier(**configs) - self.stitcher = stitch.Stitcher(**configs) + classifier = classify.Classifier(**configs) + stitcher = stitch.Stitcher(**configs) if self.logger.isEnabledFor(logging.DEBUG): self.logger.debug(f"Classifier initiation took {time.perf_counter() - t} seconds") new_view: View = mmif.new_view() self.sign_view(new_view, parameters) - self.logger.info('Minimum time frame score: %s', self.stitcher.min_timeframe_score) + self.logger.info('Minimum time frame score: %s', stitcher.min_timeframe_score) vds = mmif.get_documents_by_type(DocumentTypes.VideoDocument) if not vds: @@ -114,7 +85,8 @@ def _annotate(self, mmif: Union[str, dict, Mmif], **parameters) -> Mmif: extracted = vdh.extract_frames_as_images(vd, sampled, as_PIL=True) self.logger.debug(f"Seeking time: {time.perf_counter() - t:.2f} seconds\n") - predictions = self.classifier.classify_images(extracted, positions, total_ms) + # the last `total_ms` (as a fixed value) only works since the app is processing only one video at a time + predictions = classifier.classify_images(extracted, positions, total_ms) if self.logger.isEnabledFor(logging.DEBUG): self.logger.debug(f"Processing took {time.perf_counter() - t} seconds") @@ -135,8 +107,8 @@ def _annotate(self, mmif: Union[str, dict, Mmif], **parameters) -> Mmif: return mmif new_view.new_contain(AnnotationTypes.TimeFrame, - document=vd.id, timeUnit='milliseconds', labelset=list(self.stitcher.stitch_label.keys())) - timeframes = self.stitcher.create_timeframes(predictions) + document=vd.id, timeUnit='milliseconds', labelset=list(stitcher.stitch_label.keys())) + timeframes = stitcher.create_timeframes(predictions) for tf in timeframes: timeframe_annotation = new_view.new_annotation(AnnotationTypes.TimeFrame) timeframe_annotation.add_property("label", tf.label), diff --git a/modeling/config/classifier.yml b/modeling/config/classifier.yml index 2575e08..9a4f43a 100644 --- a/modeling/config/classifier.yml +++ b/modeling/config/classifier.yml @@ -1,5 +1,5 @@ -model_file: "modeling/models/20240126-180026.convnext_lg.kfold_000.pt" -model_config_file: "modeling/models/20240126-180026.convnext_lg.kfold_config.yml" +model_file: "modeling/models/20240409-091401.convnext_lg.kfold_013.pt" +model_config_file: "modeling/models/20240409-091401.convnext_lg.kfold_config.yml" # Milliseconds between sampled frames sampleRate: 1000 @@ -90,5 +90,57 @@ postbins: - "G" - 'T' - 'F' - - \ No newline at end of file + 20240409-091401.convnext_lg.kfold_013: + bars: + - "B" + slate: + - "S" + - "S:H" + - "S:C" + - "S:D" + - "S:G" + other_opening: + - "W" + - "L" + - "O" + - "M" + chyron: + - "I" + - "N" + - "Y" + credit: + - "C" + - "R" + other_text: + - "E" + - "K" + - "G" + - 'T' + - 'F' + 20240409-093229.convnext_tiny.kfold_012: + bars: + - "B" + slate: + - "S" + - "S:H" + - "S:C" + - "S:D" + - "S:G" + other_opening: + - "W" + - "L" + - "O" + - "M" + chyron: + - "I" + - "N" + - "Y" + credit: + - "C" + - "R" + other_text: + - "E" + - "K" + - "G" + - 'T' + - 'F' diff --git a/modeling/data_loader.py b/modeling/data_loader.py index b396b88..10c6207 100644 --- a/modeling/data_loader.py +++ b/modeling/data_loader.py @@ -125,9 +125,9 @@ def encode_position(self, cur_time, tot_time, img_vec): pos_vec = torch.tensor([pos]).to(img_vec.dtype) return torch.concat((img_vec, pos_vec)) elif self.pos_encoder == 'sinusoidal-add': - return torch.add(img_vec, self.pos_vec_lookup[(cur_time / self.pos_unit).round()]) + return torch.add(img_vec, self.pos_vec_lookup[round(cur_time / self.pos_unit)]) elif self.pos_encoder == 'sinusoidal-concat': - return torch.concat((img_vec, self.pos_vec_lookup[(cur_time / self.pos_unit).round()])) + return torch.concat((img_vec, self.pos_vec_lookup[round(cur_time / self.pos_unit)])) else: return img_vec diff --git a/modeling/models/20240409-091401.convnext_lg.kfold_013.csv b/modeling/models/20240409-091401.convnext_lg.kfold_013.csv new file mode 100644 index 0000000..6f0e497 --- /dev/null +++ b/modeling/models/20240409-091401.convnext_lg.kfold_013.csv @@ -0,0 +1,24 @@ +Model_Name,Label,Accuracy,Precision,Recall,F1-Score +convnext_lg,B,0.9989030957221985,0.976190447807312,0.9534883499145508,0.9647058844566345 +convnext_lg,S,0.9996343851089478,1.0,0.8888888955116272,0.9411764740943909 +convnext_lg,S:H,1.0,0.0,0.0,0.0 +convnext_lg,S:C,1.0,0.0,0.0,0.0 +convnext_lg,S:D,1.0,0.0,0.0,0.0 +convnext_lg,S:B,1.0,0.0,0.0,0.0 +convnext_lg,S:G,1.0,0.0,0.0,0.0 +convnext_lg,W,1.0,0.0,0.0,0.0 +convnext_lg,L,0.995978057384491,0.7894737124443054,0.6818181872367859,0.7317073345184326 +convnext_lg,O,0.9992687106132507,0.75,0.75,0.75 +convnext_lg,M,0.9879341721534729,0.23333333432674408,0.4117647111415863,0.2978723347187042 +convnext_lg,I,0.9893966913223267,0.9913793206214905,0.8041958212852478,0.8880308866500854 +convnext_lg,N,1.0,0.0,0.0,0.0 +convnext_lg,E,0.9926874041557312,0.0,0.0,0.0 +convnext_lg,P,0.937111496925354,0.91862952709198,0.9884792566299438,0.952275276184082 +convnext_lg,Y,1.0,0.0,0.0,0.0 +convnext_lg,K,1.0,0.0,0.0,0.0 +convnext_lg,G,0.9802559614181519,0.0,0.0,0.0 +convnext_lg,T,1.0,0.0,0.0,0.0 +convnext_lg,F,0.9934186339378357,0.5666666626930237,0.7727272510528564,0.6538461446762085 +convnext_lg,C,0.9897623658180237,0.4285714328289032,0.36000001430511475,0.3913043439388275 +convnext_lg,R,0.9992687106132507,0.6666666865348816,0.6666666865348816,0.6666666865348816 +convnext_lg,NEG,0.9265082478523254,0.9066901206970215,0.7767722606658936,0.8367181420326233 diff --git a/modeling/models/20240409-091401.convnext_lg.kfold_013.pt b/modeling/models/20240409-091401.convnext_lg.kfold_013.pt new file mode 100644 index 0000000..f72aad0 Binary files /dev/null and b/modeling/models/20240409-091401.convnext_lg.kfold_013.pt differ diff --git a/modeling/models/20240409-091401.convnext_lg.kfold_config.yml b/modeling/models/20240409-091401.convnext_lg.kfold_config.yml new file mode 100644 index 0000000..eab5586 --- /dev/null +++ b/modeling/models/20240409-091401.convnext_lg.kfold_config.yml @@ -0,0 +1,33 @@ +num_splits: 20 +num_epochs: 10 +num_layers: 4 +pos_enc_name: sinusoidal-concat +max_input_length: 5640000 +pos_unit: 60000 +pos_enc_dim: 256 +dropouts: 0.1 +img_enc_name: convnext_lg +block_guids_train: +- cpb-aacip-254-75r7szdz +block_guids_valid: +- cpb-aacip-254-75r7szdz +- cpb-aacip-259-4j09zf95 +- cpb-aacip-526-hd7np1xn78 +- cpb-aacip-75-72b8h82x +- cpb-aacip-fe9efa663c6 +- cpb-aacip-f5847a01db5 +- cpb-aacip-f2a88c88d9d +- cpb-aacip-ec590a6761d +- cpb-aacip-c7c64922fcd +- cpb-aacip-f3fa7215348 +- cpb-aacip-f13ae523e20 +- cpb-aacip-e7a25f07d35 +- cpb-aacip-ce6d5e4bd7f +- cpb-aacip-690722078b2 +- cpb-aacip-e649135e6ec +- cpb-aacip-15-93gxdjk6 +- cpb-aacip-512-4f1mg7h078 +- cpb-aacip-512-4m9183583s +- cpb-aacip-512-4b2x34nt7g +- cpb-aacip-512-3n20c4tr34 +- cpb-aacip-512-3f4kk9534t diff --git a/modeling/models/20240409-093229.convnext_tiny.kfold_012.csv b/modeling/models/20240409-093229.convnext_tiny.kfold_012.csv new file mode 100644 index 0000000..2d491ad --- /dev/null +++ b/modeling/models/20240409-093229.convnext_tiny.kfold_012.csv @@ -0,0 +1,24 @@ +Model_Name,Label,Accuracy,Precision,Recall,F1-Score +convnext_tiny,B,1.0,1.0,1.0,1.0 +convnext_tiny,S,1.0,1.0,1.0,1.0 +convnext_tiny,S:H,1.0,0.0,0.0,0.0 +convnext_tiny,S:C,1.0,0.0,0.0,0.0 +convnext_tiny,S:D,1.0,0.0,0.0,0.0 +convnext_tiny,S:B,1.0,0.0,0.0,0.0 +convnext_tiny,S:G,1.0,0.0,0.0,0.0 +convnext_tiny,W,1.0,0.0,0.0,0.0 +convnext_tiny,L,0.9955801367759705,0.6666666865348816,0.8571428656578064,0.75 +convnext_tiny,O,1.0,0.0,0.0,0.0 +convnext_tiny,M,0.9933701753616333,0.4000000059604645,0.4000000059604645,0.4000000059604645 +convnext_tiny,I,0.9955801367759705,1.0,0.692307710647583,0.8181818127632141 +convnext_tiny,N,1.0,0.0,0.0,0.0 +convnext_tiny,E,1.0,0.0,0.0,0.0 +convnext_tiny,P,0.8972375392913818,0.8815286755561829,1.0,0.937034547328949 +convnext_tiny,Y,1.0,0.0,0.0,0.0 +convnext_tiny,K,1.0,0.0,0.0,0.0 +convnext_tiny,G,0.9988950490951538,0.0,0.0,0.0 +convnext_tiny,T,1.0,0.0,0.0,0.0 +convnext_tiny,F,1.0,0.0,0.0,0.0 +convnext_tiny,C,0.9977900385856628,0.8333333134651184,1.0,0.9090909361839294 +convnext_tiny,R,0.9955801367759705,0.0,0.0,0.0 +convnext_tiny,NEG,0.8983425498008728,0.9803921580314636,0.3546099364757538,0.5208333134651184 diff --git a/modeling/models/20240409-093229.convnext_tiny.kfold_012.pt b/modeling/models/20240409-093229.convnext_tiny.kfold_012.pt new file mode 100644 index 0000000..e95a480 Binary files /dev/null and b/modeling/models/20240409-093229.convnext_tiny.kfold_012.pt differ diff --git a/modeling/models/20240409-093229.convnext_tiny.kfold_config.yml b/modeling/models/20240409-093229.convnext_tiny.kfold_config.yml new file mode 100644 index 0000000..a75378c --- /dev/null +++ b/modeling/models/20240409-093229.convnext_tiny.kfold_config.yml @@ -0,0 +1,33 @@ +num_splits: 20 +num_epochs: 10 +num_layers: 4 +pos_enc_name: sinusoidal-concat +max_input_length: 5640000 +pos_unit: 60000 +pos_enc_dim: 256 +dropouts: 0.1 +img_enc_name: convnext_tiny +block_guids_train: +- cpb-aacip-254-75r7szdz +block_guids_valid: +- cpb-aacip-254-75r7szdz +- cpb-aacip-259-4j09zf95 +- cpb-aacip-526-hd7np1xn78 +- cpb-aacip-75-72b8h82x +- cpb-aacip-fe9efa663c6 +- cpb-aacip-f5847a01db5 +- cpb-aacip-f2a88c88d9d +- cpb-aacip-ec590a6761d +- cpb-aacip-c7c64922fcd +- cpb-aacip-f3fa7215348 +- cpb-aacip-f13ae523e20 +- cpb-aacip-e7a25f07d35 +- cpb-aacip-ce6d5e4bd7f +- cpb-aacip-690722078b2 +- cpb-aacip-e649135e6ec +- cpb-aacip-15-93gxdjk6 +- cpb-aacip-512-4f1mg7h078 +- cpb-aacip-512-4m9183583s +- cpb-aacip-512-4b2x34nt7g +- cpb-aacip-512-3n20c4tr34 +- cpb-aacip-512-3f4kk9534t