Merge pull request #219 from clamsproject/develop

releasing 1.0.1
clamsproject · Feb 10, 2024 · 7019c85 · 7019c85
2 parents 82e7e83 + fcb4046
commit 7019c85
Show file tree

Hide file tree

Showing 50 changed files with 5,778 additions and 125 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -6,6 +6,10 @@ The format is loosely based on [Keep a Changelog](http://keepachangelog.com/). L
 
 This file documents changes made to the MMIF specification. Version names used to start with `spec-` because the Python MMIF SDK was also maintained in this repository. Starting with version 0.2.2 the repository was split and the prefix was discarded.
 
+## Version 1.0.1 - 2024-02-07
+- vocabulary types now have `similarTo` field to link similar type definitions as URI (https://github.com/clamsproject/mmif/issues/203).
+- updated `TimeFrame` definition to ease `frameType` value restrictions (https://github.com/clamsproject/mmif/issues/207).
+
 ## Version 1.0.0 - 2023-05-26 
 
 - Re-release of 0.5.0 (our last release candidate) as 1.0.0 stable version. 

diff --git a/VERSION b/VERSION
@@ -1 +1 @@
-1.0.0
+1.0.1
diff --git a/build.py b/build.py
@@ -322,8 +322,6 @@ def _add_head(self, cur_vocab_ver) -> None:
             dtrs.append(HREF('/'.join(['..'] * len(uri_suffix) + uri_suffix), n['name']))
             dtrs.append(SPAN('>'))
         dtrs.append(SPAN(self.clams_type['name']))
-        latest = tag('p', text=f'from {cur_vocab_ver} (last updated)')
-        dtrs.append(latest)
         p = tag('p', {'class': 'head'}, dtrs=dtrs)
         self.main_content.append(p)
         self._add_space()
@@ -350,6 +348,9 @@ def get_identity_row(identity_url):
         elif self.clams_type['version'] == 'v2' and self.clams_type['name'] == 'Annotation':
             children.append(
                 get_identity_row(f'https://mmif.clams.ai/0.4.2/vocabulary/{self.clams_type["name"]}/'))
+        if 'similarTo' in self.clams_type:
+            for s in self.clams_type['similarTo']:
+                children.append(TABLE_ROW([tag('td', text='Similar to'), tag('td', dtrs=[HREF(s, s)])]))
         table = TABLE(dtrs=children)
         self.main_content.append(table)
 
@@ -398,8 +399,8 @@ def _add_properties_aux(self, properties) -> None:
     def _add_header(self) -> None:
         header = DIV({'id': 'pageHeader'},
                      dtrs=[
-                         H1(f'{VOCAB_TITLE}'), 
-                         H2(f'{self.clams_type["name"]} ({self.clams_type["version"]})'),
+                         H1(f'{self.clams_type["name"]} ({self.clams_type["version"]})'),
+                         H2(f'{VOCAB_TITLE}'), 
                          ])
         self.intro.append(header)
 
@@ -527,14 +528,44 @@ def build_vocab(src, index_dir, mmif_version, item_dir) -> Tree:
                 attype_versions_included[attypename][attypever].append(old_ver)
 
     old_types = {t['name']: t for t in last_clams_types}
+    tree = Tree(new_clams_types)
+
+    def how_different(type1, type2):
+        """
+        return 0 if the types are the same, 
+        1 if the differences should be propagated to the children
+        2 if the types are different in description and parent-ship only (no propagation),
+        """
+        for inheritable in ('properties', 'metadata'):
+            if type1.get(inheritable, {}) != type2.get(inheritable, {}):
+                return 1
+        if type1['description'] != type2['description'] or type1['parent'] != type2['parent']:
+            return 2
+        return 0
+
+    updated = collections.defaultdict(lambda: False)
+
+    def propagate_version_changes(node, parent_changed=False):
+        if parent_changed:
+            updated[node['name']] = True
+            for child in node['childNodes']:
+                propagate_version_changes(child, True)
+        else:
+            difference = how_different(node, old_types[node['name']])
+            if difference > 0:
+                updated[node['name']] = True
+            for child in node['childNodes']:
+                propagate_version_changes(child, difference == 1)
+
+    root = tree.root
+    propagate_version_changes(root, False)
+
     for t in new_clams_types:
         v = latest_attype_vers[t['name']]
-        if t != old_types[t['name']]:
+        if updated[t['name']]:
             v += 1
         t['version'] = format_attype_version(v)
 
-    tree = Tree(new_clams_types)
-
     # the main `x.y.z/vocabulary/index.html` page with the vocab tree
     IndexPage(tree, index_dir, mmif_version).write()
     # then, redirection HTML files for each vocab types to its own versioned html page

diff --git a/docs/1.0.1/index.md b/docs/1.0.1/index.md
diff --git a/docs/1.0.1/pi78oGjdT-annotated.jpg b/docs/1.0.1/pi78oGjdT-annotated.jpg
diff --git a/docs/1.0.1/pi78oGjdT.jpg b/docs/1.0.1/pi78oGjdT.jpg
diff --git a/docs/1.0.1/samples/bars-tones-slates/index.md b/docs/1.0.1/samples/bars-tones-slates/index.md
@@ -0,0 +1,33 @@
+---
+layout: page
+title: MMIF Specification
+subtitle: Version 1.0.1
+---
+
+# Example: Bars and Tones and Slates
+
+To see the full example scroll down to the end or open the [raw json file](raw.json).
+
+This is a minimal example that contains two media documents, one pointing at a video and the other at a transcript. For the first document there are two views, one with bars-and-tone annotations and one with slate annotations. For the second document there is one view with the results of a tokenizer. This example file, while minimal, has everything required by MMIF.
+
+Some notes:
+
+- The metadata just specify the MMIF version.
+- Both media documents in the *documents* list refer to a location on a local disk or a mounted disk. If this document is not on a local disk or mounted disk then URLs should be used. 
+- Each view has some metadata spelling out several kinds of things:
+  - The application that created the view.
+  - A timestamp of when the view was created.
+  - What kind of annotations are in the view and what metadata are there on those annotations (for example, in the view with id=v2, the *contains* field has a property "http://mmif.clams.ai/vocabulary/TimeFrame/v2" with a dictionary as the value and that dictionary contains the metadata. Here the metadata specify what document the annotations are over what unit is used for annotation offsets.
+
+Only one annotation is shown for each view, this is to keep the file as small as possible. Of course, often the bars-and-tones and slate views often have only one annotation so it is likely only the tokens view where annotations were left out.
+
+
+
+## Full MMIF File
+
+```json
+{% include_relative raw.json %}
+```
+
+
+
diff --git a/docs/1.0.1/samples/bars-tones-slates/raw.json b/docs/1.0.1/samples/bars-tones-slates/raw.json
@@ -0,0 +1,96 @@
+{
+  "metadata": {
+    "mmif": "http://mmif.clams.ai/1.0.1"
+  },
+  "documents": [
+    {
+      "@type": "http://mmif.clams.ai/vocabulary/VideoDocument/v1",
+      "properties": {
+        "id": "m1",
+        "mime": "video/mp4",
+        "location": "file:///var/archive/video-0012.mp4"
+      }
+    },
+    {
+      "@type": "http://mmif.clams.ai/vocabulary/TextDocument/v1",
+      "properties": {
+        "id": "m2",
+        "mime": "text/plain",
+        "location": "file:///var/archive/video-0012-transcript.txt"
+      }
+    }
+  ],
+  "views": [
+    {
+      "id": "v1",
+      "metadata": {
+        "app": "http://apps.clams.ai/bars-and-tones/1.0.5",
+        "timestamp": "2020-05-27T12:23:45",
+        "contains": {
+          "http://mmif.clams.ai/vocabulary/TimeFrame/v2": {
+            "document": "m1",
+            "timeUnit": "seconds"
+          }
+        }
+      },
+      "annotations": [
+        {
+          "@type": "http://mmif.clams.ai/vocabulary/TimeFrame/v2",
+          "properties": {
+            "id": "s1",
+            "start": 0,
+            "end": 5,
+            "frameType": "bars-and-tones"
+          }
+        }
+      ]
+    },
+    {
+      "id": "v2",
+      "metadata": {
+        "app": "http://apps.clams.ai/slates/1.0.3",
+        "timestamp": "2020-05-27T12:23:45",
+        "contains": {
+          "http://mmif.clams.ai/vocabulary/TimeFrame/v2": {
+            "document": "m1",
+            "timeUnit": "seconds"
+          }
+        }
+      },
+      "annotations": [
+        {
+          "@type": "http://mmif.clams.ai/vocabulary/TimeFrame/v2",
+          "properties": {
+            "id": "s1",
+            "start": 25,
+            "end": 38,
+            "frameType": "slate"
+          }
+        }
+      ]
+    },
+    {
+      "id": "v3",
+      "metadata": {
+        "app": "http://apps.clams.ai/spacy/1.3.0",
+        "timestamp": "2020-05-27T12:25:15",
+        "contains": {
+          "http://vocab.lappsgrid.org/Token": {
+            "document": "m2"
+          }
+        }
+      },
+      "annotations": [
+        {
+          "@type": "http://vocab.lappsgrid.org/Token",
+          "properties": {
+            "id": "s1",
+            "start": 0,
+            "end": 3,
+            "word": "The"
+          }
+        }
+      ]
+    }
+  ]
+}