Merge pull request #66 from clamsproject/develop

Releasing v3.0
clamsproject · Feb 7, 2024 · e399e92 · e399e92
2 parents 29b0ca9 + 4fa13a9
commit e399e92
Show file tree

Hide file tree

Showing 39 changed files with 19,562 additions and 223 deletions.
diff --git a/Containerfile b/Containerfile
@@ -1,4 +1,4 @@
-FROM ghcr.io/clamsproject/clams-python-opencv4:1.0.9
+FROM ghcr.io/clamsproject/clams-python-opencv4-torch2:1.1.0
 
 # See https://github.com/orgs/clamsproject/packages?tab=packages&q=clams-python for more base images
 # IF you want to automatically publish this image to the clamsproject organization, 
@@ -24,8 +24,8 @@ ENV CLAMS_APP_VERSION ${CLAMS_APP_VERSION}
 
 WORKDIR /app
 
-COPY requirements-app.txt .
-RUN pip install --no-cache-dir -r /app/requirements-app.txt
+COPY requirements.txt .
+RUN pip install --no-cache-dir -r /app/requirements.txt
 
 COPY . .
 RUN python /app/dl_backbone.py

diff --git a/README.md b/README.md
@@ -40,61 +40,42 @@ Using the app to process a MMIF file:
 curl -X POST [email protected] http://localhost:5000/
 ```
 
-This may take a while depending on the size of the video file embedded in the MMIF file. It should return a MMIF object with timeframes added, for example
+This may take a while depending on the size of the video file embedded in the MMIF file. It should return a MMIF object with TimeFrame and TimePoint annotations added.
+
+
+### Output details
+
+A TimeFrame looks as follows (the scores are somewhat condensed for clarity):
 
 ```json
 {
-  "metadata": {
-    "mmif": "http://mmif.clams.ai/0.4.0"
-  },
-  "documents": [
-    {
-      "@type": "http://mmif.clams.ai/0.4.0/vocabulary/VideoDocument",
-      "properties": {
-        "mime": "video/mpeg",
-        "id": "m1",
-        "location": "file:///data/video/cpb-aacip-690722078b2-shrunk.mp4"
-      }
-    }
-  ],
-  "views": [
-    {
-      "id": "v_0",
-      "metadata": {
-        "timestamp": "2023-11-06T20:00:18.311889",
-        "app": "http://apps.clams.ai/swt-detection",
-        "contains": {
-          "http://mmif.clams.ai/vocabulary/TimeFrame/v1": {
-            "document": "m1"
-          }
-        },
-        "parameters": {
-          "pretty": "True"
-        }
-      },
-      "annotations": [
-        {
-          "@type": "http://mmif.clams.ai/vocabulary/TimeFrame/v1",
-          "properties": {
-            "start": 30000,
-            "end": 40000,
-            "frameType": "slate",
-            "score": 3.909090909090909,
-            "id": "tf_1"
-          }
-        },
-        {
-          "@type": "http://mmif.clams.ai/vocabulary/TimeFrame/v1",
-          "properties": {
-            "start": 56000,
-            "end": 58000,
-            "frameType": "slate",
-            "score": 1.3333333333333333,
-            "id": "tf_2"
-          }
-        }
-      ]
-    }
-  ]
+  "@type": "http://mmif.clams.ai/vocabulary/TimeFrame/v1",
+  "properties": {
+    "frameType": "bars",
+    "score": 0.9999,
+    "scores": [0.9998, 0.9999, 0.9998, 0.9999, 0.9999],
+    "targets": ["tp_1", "tp_2", "tp_3", "tp_4", "tp_5"],
+    "representatives": ["tp_2"],
+    "id": "tf_1"
+  }
 }
 ```
+
+The `targets` property containes the identifiers of the TimePoints that are included in the TimeFrame, in `scores` we have the TimePoint scores for the "bars" frame type, in `score` we have the average score for the entire TimeFrame, and in `representatives` we have pointers to TimePoints that are considered representative for thie TimeFrame.
+
+Only TimePoints that are included in a TimeFrame will be in the MMIF output, here is one (heavily condensed for clarity and only showing four of the labels):
+
+```json
+{
+  "@type": "http://mmif.clams.ai/vocabulary/TimePoint/v1",
+  "properties": {
+    "timePont": 0,
+    "label": "B",
+    "labels": ["B", "S", "S:H", "S:C"],
+    "scores": [0.9998, 5.7532e-08, 2.4712e-13, 1.9209e-12],
+    "id": "tp_1"
+  }
+}
+```
+
+The `label` property has the raw label for the TimePoint (which is potentially different from the frameType in the TimeFrame, for one, for the TimeFrame we typically group various raw labels together). In `labels` we have all labels for the TimePoint and in `scores` we have all classifier scores for the labels. 
diff --git a/app.py b/app.py
@@ -37,11 +37,10 @@ def _appmetadata(self):
         pass
 
     def _annotate(self, mmif: Union[str, dict, Mmif], **parameters) -> Mmif:
-        # see https://sdk.clams.ai/autodoc/clams.app.html#clams.app.ClamsApp._annotate
 
-        parameters = self.get_configuration(**parameters)
         new_view: View = mmif.new_view()
         self.sign_view(new_view, parameters)
+        self._export_parameters(parameters)
 
         vds = mmif.get_documents_by_type(DocumentTypes.VideoDocument)
         if not vds:
@@ -50,8 +49,46 @@ def _annotate(self, mmif: Union[str, dict, Mmif], **parameters) -> Mmif:
             return mmif
         vd = vds[0]
 
+        predictions = self.classifier.process_video(vd.location_path(nonexist_ok=False))
+        timeframes = self.stitcher.create_timeframes(predictions)
+
+        new_view.new_contain(
+            AnnotationTypes.TimeFrame, document=vd.id, timeUnit='milliseconds')
+        new_view.new_contain(
+            AnnotationTypes.TimePoint, document=vd.id, timeUnit='milliseconds')
+
+        for tf in timeframes:
+            timeframe_annotation = new_view.new_annotation(AnnotationTypes.TimeFrame)
+            timeframe_annotation.add_property("frameType", tf.label),
+            timeframe_annotation.add_property("score", tf.score)
+            timeframe_annotation.add_property("scores", tf.scores)
+            timepoint_annotations = []
+            for prediction in tf.targets:
+                timepoint_annotation = new_view.new_annotation(AnnotationTypes.TimePoint)
+                prediction.annotation = timepoint_annotation
+                scores = [prediction.score_for_label(lbl) for lbl in prediction.labels]
+                label = self._label_with_highest_score(prediction.labels, scores)
+                timepoint_annotation.add_property('timePont', prediction.timepoint)
+                timepoint_annotation.add_property('label', label)
+                timepoint_annotation.add_property('labels', prediction.labels)
+                timepoint_annotation.add_property('scores', scores)
+                timepoint_annotations.append(timepoint_annotation)
+            timeframe_annotation.add_property(
+                'targets', [tp.id for tp in timepoint_annotations])
+            reps = [p.annotation.id for p in tf.representative_predictions()]
+            timeframe_annotation.add_property("representatives", reps)
+            #print(timeframe_annotation.serialize(pretty=True))
+
+        return mmif
+
+    def _export_parameters(self, parameters: dict):
+        """Export the parameters to the Classifier and Stitcher instances."""
         for parameter, value in parameters.items():
-            if parameter == "sampleRate":
+            if parameter == "startAt":
+                self.classifier.start_at = value
+            elif parameter == "stopAt":
+                self.classifier.stop_at = value
+            elif parameter == "sampleRate":
                 self.classifier.sample_rate = value
                 self.stitcher.sample_rate = value
             elif parameter == "minFrameScore":
@@ -61,19 +98,13 @@ def _annotate(self, mmif: Union[str, dict, Mmif], **parameters) -> Mmif:
             elif parameter == "minFrameCount":
                 self.stitcher.min_frame_count = value
 
-        predictions = self.classifier.process_video(vd.location)
-        timeframes = self.stitcher.create_timeframes(predictions)
+    def _label_with_highest_score(self, labels: list, scores: list) -> str:
+        """Return the label associated with the highest scores. The score for 
+        labels[i] is scores[i]."""
+        # TODO: now the NEG scores are included, perhaps not do that
+        sorted_scores = list(sorted(zip(scores, labels), reverse=True))
+        return sorted_scores[0][1]
 
-        new_view.new_contain(
-            AnnotationTypes.TimeFrame, document=vd.id, timeUnit='milliseconds')
-        for tf in timeframes:
-            timeframe_annotation = new_view.new_annotation(AnnotationTypes.TimeFrame)
-            timeframe_annotation.add_property("start", tf.start)
-            timeframe_annotation.add_property("end", tf.end)
-            timeframe_annotation.add_property("frameType", tf.label),
-            timeframe_annotation.add_property("score", tf.score)
-
-        return mmif
 
 
 if __name__ == "__main__":

diff --git a/metadata.py b/metadata.py
@@ -28,11 +28,23 @@ def appmetadata() -> AppMetadata:
     )
 
     metadata.add_input(DocumentTypes.VideoDocument, required=True)
-    metadata.add_output(AnnotationTypes.TimeFrame, timeUnit='milliseconds')
-
+    metadata.add_output(AnnotationTypes.TimeFrame, timeUnit='milliseconds', frameType='bars')
+    metadata.add_output(AnnotationTypes.TimeFrame, timeUnit='milliseconds', frameType='slate')
+    metadata.add_output(AnnotationTypes.TimeFrame, timeUnit='milliseconds', frameType='chyron')
+    metadata.add_output(AnnotationTypes.TimeFrame, timeUnit='milliseconds', frameType='credits')
+    metadata.add_output(AnnotationTypes.TimePoint, timeUnit='milliseconds')
+
     # TODO: defaults are the same as in modeling/config/classifier.yml, which is possibly
     # not a great idea, should perhaps read defaults from the configuration file. There is
     # also a movement afoot to get rid of the configuration file.
+    metadata.add_parameter(
+        name='startAt', type='integer', default=0,
+        description='Number of milliseconds into the video to start processing')
+    metadata.add_parameter(
+        # 10M ms is almost 3 hours, that should do; this is better than sys.maxint
+        # (also, I tried using default=None, but that made stopAt a required property)
+        name='stopAt', type='integer', default=10000000,
+        description='Number of milliseconds into the video to stop processing')
     metadata.add_parameter(
         name='sampleRate', type='integer', default=1000,
         description='Milliseconds between sampled frames')