Skip to content

Commit

Permalink
Merge pull request #66 from clamsproject/develop
Browse files Browse the repository at this point in the history
Releasing v3.0
  • Loading branch information
marcverhagen authored Feb 7, 2024
2 parents 29b0ca9 + 4fa13a9 commit e399e92
Show file tree
Hide file tree
Showing 39 changed files with 19,562 additions and 223 deletions.
6 changes: 3 additions & 3 deletions Containerfile
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
FROM ghcr.io/clamsproject/clams-python-opencv4:1.0.9
FROM ghcr.io/clamsproject/clams-python-opencv4-torch2:1.1.0

# See https://github.com/orgs/clamsproject/packages?tab=packages&q=clams-python for more base images
# IF you want to automatically publish this image to the clamsproject organization,
Expand All @@ -24,8 +24,8 @@ ENV CLAMS_APP_VERSION ${CLAMS_APP_VERSION}

WORKDIR /app

COPY requirements-app.txt .
RUN pip install --no-cache-dir -r /app/requirements-app.txt
COPY requirements.txt .
RUN pip install --no-cache-dir -r /app/requirements.txt

COPY . .
RUN python /app/dl_backbone.py
Expand Down
87 changes: 34 additions & 53 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -40,61 +40,42 @@ Using the app to process a MMIF file:
curl -X POST [email protected] http://localhost:5000/
```

This may take a while depending on the size of the video file embedded in the MMIF file. It should return a MMIF object with timeframes added, for example
This may take a while depending on the size of the video file embedded in the MMIF file. It should return a MMIF object with TimeFrame and TimePoint annotations added.


### Output details

A TimeFrame looks as follows (the scores are somewhat condensed for clarity):

```json
{
"metadata": {
"mmif": "http://mmif.clams.ai/0.4.0"
},
"documents": [
{
"@type": "http://mmif.clams.ai/0.4.0/vocabulary/VideoDocument",
"properties": {
"mime": "video/mpeg",
"id": "m1",
"location": "file:///data/video/cpb-aacip-690722078b2-shrunk.mp4"
}
}
],
"views": [
{
"id": "v_0",
"metadata": {
"timestamp": "2023-11-06T20:00:18.311889",
"app": "http://apps.clams.ai/swt-detection",
"contains": {
"http://mmif.clams.ai/vocabulary/TimeFrame/v1": {
"document": "m1"
}
},
"parameters": {
"pretty": "True"
}
},
"annotations": [
{
"@type": "http://mmif.clams.ai/vocabulary/TimeFrame/v1",
"properties": {
"start": 30000,
"end": 40000,
"frameType": "slate",
"score": 3.909090909090909,
"id": "tf_1"
}
},
{
"@type": "http://mmif.clams.ai/vocabulary/TimeFrame/v1",
"properties": {
"start": 56000,
"end": 58000,
"frameType": "slate",
"score": 1.3333333333333333,
"id": "tf_2"
}
}
]
}
]
"@type": "http://mmif.clams.ai/vocabulary/TimeFrame/v1",
"properties": {
"frameType": "bars",
"score": 0.9999,
"scores": [0.9998, 0.9999, 0.9998, 0.9999, 0.9999],
"targets": ["tp_1", "tp_2", "tp_3", "tp_4", "tp_5"],
"representatives": ["tp_2"],
"id": "tf_1"
}
}
```

The `targets` property containes the identifiers of the TimePoints that are included in the TimeFrame, in `scores` we have the TimePoint scores for the "bars" frame type, in `score` we have the average score for the entire TimeFrame, and in `representatives` we have pointers to TimePoints that are considered representative for thie TimeFrame.

Only TimePoints that are included in a TimeFrame will be in the MMIF output, here is one (heavily condensed for clarity and only showing four of the labels):

```json
{
"@type": "http://mmif.clams.ai/vocabulary/TimePoint/v1",
"properties": {
"timePont": 0,
"label": "B",
"labels": ["B", "S", "S:H", "S:C"],
"scores": [0.9998, 5.7532e-08, 2.4712e-13, 1.9209e-12],
"id": "tp_1"
}
}
```

The `label` property has the raw label for the TimePoint (which is potentially different from the frameType in the TimeFrame, for one, for the TimeFrame we typically group various raw labels together). In `labels` we have all labels for the TimePoint and in `scores` we have all classifier scores for the labels.
61 changes: 46 additions & 15 deletions app.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,11 +37,10 @@ def _appmetadata(self):
pass

def _annotate(self, mmif: Union[str, dict, Mmif], **parameters) -> Mmif:
# see https://sdk.clams.ai/autodoc/clams.app.html#clams.app.ClamsApp._annotate

parameters = self.get_configuration(**parameters)
new_view: View = mmif.new_view()
self.sign_view(new_view, parameters)
self._export_parameters(parameters)

vds = mmif.get_documents_by_type(DocumentTypes.VideoDocument)
if not vds:
Expand All @@ -50,8 +49,46 @@ def _annotate(self, mmif: Union[str, dict, Mmif], **parameters) -> Mmif:
return mmif
vd = vds[0]

predictions = self.classifier.process_video(vd.location_path(nonexist_ok=False))
timeframes = self.stitcher.create_timeframes(predictions)

new_view.new_contain(
AnnotationTypes.TimeFrame, document=vd.id, timeUnit='milliseconds')
new_view.new_contain(
AnnotationTypes.TimePoint, document=vd.id, timeUnit='milliseconds')

for tf in timeframes:
timeframe_annotation = new_view.new_annotation(AnnotationTypes.TimeFrame)
timeframe_annotation.add_property("frameType", tf.label),
timeframe_annotation.add_property("score", tf.score)
timeframe_annotation.add_property("scores", tf.scores)
timepoint_annotations = []
for prediction in tf.targets:
timepoint_annotation = new_view.new_annotation(AnnotationTypes.TimePoint)
prediction.annotation = timepoint_annotation
scores = [prediction.score_for_label(lbl) for lbl in prediction.labels]
label = self._label_with_highest_score(prediction.labels, scores)
timepoint_annotation.add_property('timePont', prediction.timepoint)
timepoint_annotation.add_property('label', label)
timepoint_annotation.add_property('labels', prediction.labels)
timepoint_annotation.add_property('scores', scores)
timepoint_annotations.append(timepoint_annotation)
timeframe_annotation.add_property(
'targets', [tp.id for tp in timepoint_annotations])
reps = [p.annotation.id for p in tf.representative_predictions()]
timeframe_annotation.add_property("representatives", reps)
#print(timeframe_annotation.serialize(pretty=True))

return mmif

def _export_parameters(self, parameters: dict):
"""Export the parameters to the Classifier and Stitcher instances."""
for parameter, value in parameters.items():
if parameter == "sampleRate":
if parameter == "startAt":
self.classifier.start_at = value
elif parameter == "stopAt":
self.classifier.stop_at = value
elif parameter == "sampleRate":
self.classifier.sample_rate = value
self.stitcher.sample_rate = value
elif parameter == "minFrameScore":
Expand All @@ -61,19 +98,13 @@ def _annotate(self, mmif: Union[str, dict, Mmif], **parameters) -> Mmif:
elif parameter == "minFrameCount":
self.stitcher.min_frame_count = value

predictions = self.classifier.process_video(vd.location)
timeframes = self.stitcher.create_timeframes(predictions)
def _label_with_highest_score(self, labels: list, scores: list) -> str:
"""Return the label associated with the highest scores. The score for
labels[i] is scores[i]."""
# TODO: now the NEG scores are included, perhaps not do that
sorted_scores = list(sorted(zip(scores, labels), reverse=True))
return sorted_scores[0][1]

new_view.new_contain(
AnnotationTypes.TimeFrame, document=vd.id, timeUnit='milliseconds')
for tf in timeframes:
timeframe_annotation = new_view.new_annotation(AnnotationTypes.TimeFrame)
timeframe_annotation.add_property("start", tf.start)
timeframe_annotation.add_property("end", tf.end)
timeframe_annotation.add_property("frameType", tf.label),
timeframe_annotation.add_property("score", tf.score)

return mmif


if __name__ == "__main__":
Expand Down
16 changes: 14 additions & 2 deletions metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,11 +28,23 @@ def appmetadata() -> AppMetadata:
)

metadata.add_input(DocumentTypes.VideoDocument, required=True)
metadata.add_output(AnnotationTypes.TimeFrame, timeUnit='milliseconds')

metadata.add_output(AnnotationTypes.TimeFrame, timeUnit='milliseconds', frameType='bars')
metadata.add_output(AnnotationTypes.TimeFrame, timeUnit='milliseconds', frameType='slate')
metadata.add_output(AnnotationTypes.TimeFrame, timeUnit='milliseconds', frameType='chyron')
metadata.add_output(AnnotationTypes.TimeFrame, timeUnit='milliseconds', frameType='credits')
metadata.add_output(AnnotationTypes.TimePoint, timeUnit='milliseconds')

# TODO: defaults are the same as in modeling/config/classifier.yml, which is possibly
# not a great idea, should perhaps read defaults from the configuration file. There is
# also a movement afoot to get rid of the configuration file.
metadata.add_parameter(
name='startAt', type='integer', default=0,
description='Number of milliseconds into the video to start processing')
metadata.add_parameter(
# 10M ms is almost 3 hours, that should do; this is better than sys.maxint
# (also, I tried using default=None, but that made stopAt a required property)
name='stopAt', type='integer', default=10000000,
description='Number of milliseconds into the video to stop processing')
metadata.add_parameter(
name='sampleRate', type='integer', default=1000,
description='Milliseconds between sampled frames')
Expand Down
Loading

0 comments on commit e399e92

Please sign in to comment.