Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

release candidate for v4.3 #93

Closed
wants to merge 2 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
46 changes: 9 additions & 37 deletions app.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,32 +24,6 @@
default_model_storage = Path(__file__).parent / 'modeling/models'


def _extract_frames_as_images(video_document, framenums, as_PIL: bool = False):
"""
``extract_frames_as_images`` in mmif.utils.video_document_helper is using a slower
iteration over the framenums. This method is a faster alternative, and monkeypatches the one in the SDK.
"""
if as_PIL:
from PIL import Image
frames = []
video = vdh.capture(video_document)
cur_f = 0
while True:
if not framenums or cur_f > video_document.get_property(vdh.FRAMECOUNT_DOCPROP_KEY):
break
ret, frame = video.read()
if not ret:
break
if cur_f == framenums[0]:
frames.append(Image.fromarray(frame[:, :, ::-1]) if as_PIL else frame)
framenums.pop(0)
cur_f += 1
return frames


vdh.extract_frames_as_images = _extract_frames_as_images


class SwtDetection(ClamsApp):

def __init__(self, preconf_fname: str = None, log_to_file: bool = False) -> None:
Expand All @@ -61,14 +35,11 @@ def __init__(self, preconf_fname: str = None, log_to_file: bool = False) -> None
self.logger.addHandler(fh)

def _appmetadata(self):
# see https://sdk.clams.ai/autodoc/clams.app.html#clams.app.ClamsApp._load_appmetadata
# Also check out ``metadata.py`` in this directory.
# When using the ``metadata.py`` leave this do-nothing "pass" method here.
# using metadata.py
pass

def _annotate(self, mmif: Union[str, dict, Mmif], **parameters) -> Mmif:
# possible bug here, as the configuration will be updated with the parameters that's not defined in the
# app metadata, but passed at the run time.
# parameter here is "refined" dict, so hopefully its values are properly validated and casted at this point.
configs = {**self.preconf, **parameters}
for k, v in configs.items():
self.logger.debug(f"Final Configuraion: {k} :: {v}")
Expand All @@ -83,14 +54,14 @@ def _annotate(self, mmif: Union[str, dict, Mmif], **parameters) -> Mmif:
self.logger.info(f"Initiating classifier with {configs['model_file']}")
if self.logger.isEnabledFor(logging.DEBUG):
configs['logger_name'] = self.logger.name
self.classifier = classify.Classifier(**configs)
self.stitcher = stitch.Stitcher(**configs)
classifier = classify.Classifier(**configs)
stitcher = stitch.Stitcher(**configs)
if self.logger.isEnabledFor(logging.DEBUG):
self.logger.debug(f"Classifier initiation took {time.perf_counter() - t} seconds")

new_view: View = mmif.new_view()
self.sign_view(new_view, parameters)
self.logger.info('Minimum time frame score: %s', self.stitcher.min_timeframe_score)
self.logger.info('Minimum time frame score: %s', stitcher.min_timeframe_score)

vds = mmif.get_documents_by_type(DocumentTypes.VideoDocument)
if not vds:
Expand All @@ -114,7 +85,8 @@ def _annotate(self, mmif: Union[str, dict, Mmif], **parameters) -> Mmif:
extracted = vdh.extract_frames_as_images(vd, sampled, as_PIL=True)

self.logger.debug(f"Seeking time: {time.perf_counter() - t:.2f} seconds\n")
predictions = self.classifier.classify_images(extracted, positions, total_ms)
# the last `total_ms` (as a fixed value) only works since the app is processing only one video at a time
predictions = classifier.classify_images(extracted, positions, total_ms)
if self.logger.isEnabledFor(logging.DEBUG):
self.logger.debug(f"Processing took {time.perf_counter() - t} seconds")

Expand All @@ -135,8 +107,8 @@ def _annotate(self, mmif: Union[str, dict, Mmif], **parameters) -> Mmif:
return mmif

new_view.new_contain(AnnotationTypes.TimeFrame,
document=vd.id, timeUnit='milliseconds', labelset=list(self.stitcher.stitch_label.keys()))
timeframes = self.stitcher.create_timeframes(predictions)
document=vd.id, timeUnit='milliseconds', labelset=list(stitcher.stitch_label.keys()))
timeframes = stitcher.create_timeframes(predictions)
for tf in timeframes:
timeframe_annotation = new_view.new_annotation(AnnotationTypes.TimeFrame)
timeframe_annotation.add_property("label", tf.label),
Expand Down
60 changes: 56 additions & 4 deletions modeling/config/classifier.yml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
model_file: "modeling/models/20240126-180026.convnext_lg.kfold_000.pt"
model_config_file: "modeling/models/20240126-180026.convnext_lg.kfold_config.yml"
model_file: "modeling/models/20240409-091401.convnext_lg.kfold_013.pt"
model_config_file: "modeling/models/20240409-091401.convnext_lg.kfold_config.yml"

# Milliseconds between sampled frames
sampleRate: 1000
Expand Down Expand Up @@ -90,5 +90,57 @@ postbins:
- "G"
- 'T'
- 'F'


20240409-091401.convnext_lg.kfold_013:
bars:
- "B"
slate:
- "S"
- "S:H"
- "S:C"
- "S:D"
- "S:G"
other_opening:
- "W"
- "L"
- "O"
- "M"
chyron:
- "I"
- "N"
- "Y"
credit:
- "C"
- "R"
other_text:
- "E"
- "K"
- "G"
- 'T'
- 'F'
20240409-093229.convnext_tiny.kfold_012:
bars:
- "B"
slate:
- "S"
- "S:H"
- "S:C"
- "S:D"
- "S:G"
other_opening:
- "W"
- "L"
- "O"
- "M"
chyron:
- "I"
- "N"
- "Y"
credit:
- "C"
- "R"
other_text:
- "E"
- "K"
- "G"
- 'T'
- 'F'
4 changes: 2 additions & 2 deletions modeling/data_loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,9 +125,9 @@ def encode_position(self, cur_time, tot_time, img_vec):
pos_vec = torch.tensor([pos]).to(img_vec.dtype)
return torch.concat((img_vec, pos_vec))
elif self.pos_encoder == 'sinusoidal-add':
return torch.add(img_vec, self.pos_vec_lookup[(cur_time / self.pos_unit).round()])
return torch.add(img_vec, self.pos_vec_lookup[round(cur_time / self.pos_unit)])
elif self.pos_encoder == 'sinusoidal-concat':
return torch.concat((img_vec, self.pos_vec_lookup[(cur_time / self.pos_unit).round()]))
return torch.concat((img_vec, self.pos_vec_lookup[round(cur_time / self.pos_unit)]))
else:
return img_vec

Expand Down
24 changes: 24 additions & 0 deletions modeling/models/20240409-091401.convnext_lg.kfold_013.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
Model_Name,Label,Accuracy,Precision,Recall,F1-Score
convnext_lg,B,0.9989030957221985,0.976190447807312,0.9534883499145508,0.9647058844566345
convnext_lg,S,0.9996343851089478,1.0,0.8888888955116272,0.9411764740943909
convnext_lg,S:H,1.0,0.0,0.0,0.0
convnext_lg,S:C,1.0,0.0,0.0,0.0
convnext_lg,S:D,1.0,0.0,0.0,0.0
convnext_lg,S:B,1.0,0.0,0.0,0.0
convnext_lg,S:G,1.0,0.0,0.0,0.0
convnext_lg,W,1.0,0.0,0.0,0.0
convnext_lg,L,0.995978057384491,0.7894737124443054,0.6818181872367859,0.7317073345184326
convnext_lg,O,0.9992687106132507,0.75,0.75,0.75
convnext_lg,M,0.9879341721534729,0.23333333432674408,0.4117647111415863,0.2978723347187042
convnext_lg,I,0.9893966913223267,0.9913793206214905,0.8041958212852478,0.8880308866500854
convnext_lg,N,1.0,0.0,0.0,0.0
convnext_lg,E,0.9926874041557312,0.0,0.0,0.0
convnext_lg,P,0.937111496925354,0.91862952709198,0.9884792566299438,0.952275276184082
convnext_lg,Y,1.0,0.0,0.0,0.0
convnext_lg,K,1.0,0.0,0.0,0.0
convnext_lg,G,0.9802559614181519,0.0,0.0,0.0
convnext_lg,T,1.0,0.0,0.0,0.0
convnext_lg,F,0.9934186339378357,0.5666666626930237,0.7727272510528564,0.6538461446762085
convnext_lg,C,0.9897623658180237,0.4285714328289032,0.36000001430511475,0.3913043439388275
convnext_lg,R,0.9992687106132507,0.6666666865348816,0.6666666865348816,0.6666666865348816
convnext_lg,NEG,0.9265082478523254,0.9066901206970215,0.7767722606658936,0.8367181420326233
Binary file not shown.
33 changes: 33 additions & 0 deletions modeling/models/20240409-091401.convnext_lg.kfold_config.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
num_splits: 20
num_epochs: 10
num_layers: 4
pos_enc_name: sinusoidal-concat
max_input_length: 5640000
pos_unit: 60000
pos_enc_dim: 256
dropouts: 0.1
img_enc_name: convnext_lg
block_guids_train:
- cpb-aacip-254-75r7szdz
block_guids_valid:
- cpb-aacip-254-75r7szdz
- cpb-aacip-259-4j09zf95
- cpb-aacip-526-hd7np1xn78
- cpb-aacip-75-72b8h82x
- cpb-aacip-fe9efa663c6
- cpb-aacip-f5847a01db5
- cpb-aacip-f2a88c88d9d
- cpb-aacip-ec590a6761d
- cpb-aacip-c7c64922fcd
- cpb-aacip-f3fa7215348
- cpb-aacip-f13ae523e20
- cpb-aacip-e7a25f07d35
- cpb-aacip-ce6d5e4bd7f
- cpb-aacip-690722078b2
- cpb-aacip-e649135e6ec
- cpb-aacip-15-93gxdjk6
- cpb-aacip-512-4f1mg7h078
- cpb-aacip-512-4m9183583s
- cpb-aacip-512-4b2x34nt7g
- cpb-aacip-512-3n20c4tr34
- cpb-aacip-512-3f4kk9534t
24 changes: 24 additions & 0 deletions modeling/models/20240409-093229.convnext_tiny.kfold_012.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
Model_Name,Label,Accuracy,Precision,Recall,F1-Score
convnext_tiny,B,1.0,1.0,1.0,1.0
convnext_tiny,S,1.0,1.0,1.0,1.0
convnext_tiny,S:H,1.0,0.0,0.0,0.0
convnext_tiny,S:C,1.0,0.0,0.0,0.0
convnext_tiny,S:D,1.0,0.0,0.0,0.0
convnext_tiny,S:B,1.0,0.0,0.0,0.0
convnext_tiny,S:G,1.0,0.0,0.0,0.0
convnext_tiny,W,1.0,0.0,0.0,0.0
convnext_tiny,L,0.9955801367759705,0.6666666865348816,0.8571428656578064,0.75
convnext_tiny,O,1.0,0.0,0.0,0.0
convnext_tiny,M,0.9933701753616333,0.4000000059604645,0.4000000059604645,0.4000000059604645
convnext_tiny,I,0.9955801367759705,1.0,0.692307710647583,0.8181818127632141
convnext_tiny,N,1.0,0.0,0.0,0.0
convnext_tiny,E,1.0,0.0,0.0,0.0
convnext_tiny,P,0.8972375392913818,0.8815286755561829,1.0,0.937034547328949
convnext_tiny,Y,1.0,0.0,0.0,0.0
convnext_tiny,K,1.0,0.0,0.0,0.0
convnext_tiny,G,0.9988950490951538,0.0,0.0,0.0
convnext_tiny,T,1.0,0.0,0.0,0.0
convnext_tiny,F,1.0,0.0,0.0,0.0
convnext_tiny,C,0.9977900385856628,0.8333333134651184,1.0,0.9090909361839294
convnext_tiny,R,0.9955801367759705,0.0,0.0,0.0
convnext_tiny,NEG,0.8983425498008728,0.9803921580314636,0.3546099364757538,0.5208333134651184
Binary file not shown.
33 changes: 33 additions & 0 deletions modeling/models/20240409-093229.convnext_tiny.kfold_config.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
num_splits: 20
num_epochs: 10
num_layers: 4
pos_enc_name: sinusoidal-concat
max_input_length: 5640000
pos_unit: 60000
pos_enc_dim: 256
dropouts: 0.1
img_enc_name: convnext_tiny
block_guids_train:
- cpb-aacip-254-75r7szdz
block_guids_valid:
- cpb-aacip-254-75r7szdz
- cpb-aacip-259-4j09zf95
- cpb-aacip-526-hd7np1xn78
- cpb-aacip-75-72b8h82x
- cpb-aacip-fe9efa663c6
- cpb-aacip-f5847a01db5
- cpb-aacip-f2a88c88d9d
- cpb-aacip-ec590a6761d
- cpb-aacip-c7c64922fcd
- cpb-aacip-f3fa7215348
- cpb-aacip-f13ae523e20
- cpb-aacip-e7a25f07d35
- cpb-aacip-ce6d5e4bd7f
- cpb-aacip-690722078b2
- cpb-aacip-e649135e6ec
- cpb-aacip-15-93gxdjk6
- cpb-aacip-512-4f1mg7h078
- cpb-aacip-512-4m9183583s
- cpb-aacip-512-4b2x34nt7g
- cpb-aacip-512-3n20c4tr34
- cpb-aacip-512-3f4kk9534t
Loading