Skip to content

Commit

Permalink
Merge pull request #219 from clamsproject/develop
Browse files Browse the repository at this point in the history
releasing 1.0.1
  • Loading branch information
keighrim authored Feb 10, 2024
2 parents 82e7e83 + fcb4046 commit 7019c85
Show file tree
Hide file tree
Showing 50 changed files with 5,778 additions and 125 deletions.
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,10 @@ The format is loosely based on [Keep a Changelog](http://keepachangelog.com/). L

This file documents changes made to the MMIF specification. Version names used to start with `spec-` because the Python MMIF SDK was also maintained in this repository. Starting with version 0.2.2 the repository was split and the prefix was discarded.

## Version 1.0.1 - 2024-02-07
- vocabulary types now have `similarTo` field to link similar type definitions as URI (https://github.com/clamsproject/mmif/issues/203).
- updated `TimeFrame` definition to ease `frameType` value restrictions (https://github.com/clamsproject/mmif/issues/207).

## Version 1.0.0 - 2023-05-26

- Re-release of 0.5.0 (our last release candidate) as 1.0.0 stable version.
Expand Down
2 changes: 1 addition & 1 deletion VERSION
Original file line number Diff line number Diff line change
@@ -1 +1 @@
1.0.0
1.0.1
45 changes: 38 additions & 7 deletions build.py
Original file line number Diff line number Diff line change
Expand Up @@ -322,8 +322,6 @@ def _add_head(self, cur_vocab_ver) -> None:
dtrs.append(HREF('/'.join(['..'] * len(uri_suffix) + uri_suffix), n['name']))
dtrs.append(SPAN('>'))
dtrs.append(SPAN(self.clams_type['name']))
latest = tag('p', text=f'from {cur_vocab_ver} (last updated)')
dtrs.append(latest)
p = tag('p', {'class': 'head'}, dtrs=dtrs)
self.main_content.append(p)
self._add_space()
Expand All @@ -350,6 +348,9 @@ def get_identity_row(identity_url):
elif self.clams_type['version'] == 'v2' and self.clams_type['name'] == 'Annotation':
children.append(
get_identity_row(f'https://mmif.clams.ai/0.4.2/vocabulary/{self.clams_type["name"]}/'))
if 'similarTo' in self.clams_type:
for s in self.clams_type['similarTo']:
children.append(TABLE_ROW([tag('td', text='Similar to'), tag('td', dtrs=[HREF(s, s)])]))
table = TABLE(dtrs=children)
self.main_content.append(table)

Expand Down Expand Up @@ -398,8 +399,8 @@ def _add_properties_aux(self, properties) -> None:
def _add_header(self) -> None:
header = DIV({'id': 'pageHeader'},
dtrs=[
H1(f'{VOCAB_TITLE}'),
H2(f'{self.clams_type["name"]} ({self.clams_type["version"]})'),
H1(f'{self.clams_type["name"]} ({self.clams_type["version"]})'),
H2(f'{VOCAB_TITLE}'),
])
self.intro.append(header)

Expand Down Expand Up @@ -527,14 +528,44 @@ def build_vocab(src, index_dir, mmif_version, item_dir) -> Tree:
attype_versions_included[attypename][attypever].append(old_ver)

old_types = {t['name']: t for t in last_clams_types}
tree = Tree(new_clams_types)

def how_different(type1, type2):
"""
return 0 if the types are the same,
1 if the differences should be propagated to the children
2 if the types are different in description and parent-ship only (no propagation),
"""
for inheritable in ('properties', 'metadata'):
if type1.get(inheritable, {}) != type2.get(inheritable, {}):
return 1
if type1['description'] != type2['description'] or type1['parent'] != type2['parent']:
return 2
return 0

updated = collections.defaultdict(lambda: False)

def propagate_version_changes(node, parent_changed=False):
if parent_changed:
updated[node['name']] = True
for child in node['childNodes']:
propagate_version_changes(child, True)
else:
difference = how_different(node, old_types[node['name']])
if difference > 0:
updated[node['name']] = True
for child in node['childNodes']:
propagate_version_changes(child, difference == 1)

root = tree.root
propagate_version_changes(root, False)

for t in new_clams_types:
v = latest_attype_vers[t['name']]
if t != old_types[t['name']]:
if updated[t['name']]:
v += 1
t['version'] = format_attype_version(v)

tree = Tree(new_clams_types)

# the main `x.y.z/vocabulary/index.html` page with the vocab tree
IndexPage(tree, index_dir, mmif_version).write()
# then, redirection HTML files for each vocab types to its own versioned html page
Expand Down
551 changes: 551 additions & 0 deletions docs/1.0.1/index.md

Large diffs are not rendered by default.

Binary file added docs/1.0.1/pi78oGjdT-annotated.jpg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added docs/1.0.1/pi78oGjdT.jpg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
33 changes: 33 additions & 0 deletions docs/1.0.1/samples/bars-tones-slates/index.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
---
layout: page
title: MMIF Specification
subtitle: Version 1.0.1
---

# Example: Bars and Tones and Slates

To see the full example scroll down to the end or open the [raw json file](raw.json).

This is a minimal example that contains two media documents, one pointing at a video and the other at a transcript. For the first document there are two views, one with bars-and-tone annotations and one with slate annotations. For the second document there is one view with the results of a tokenizer. This example file, while minimal, has everything required by MMIF.

Some notes:

- The metadata just specify the MMIF version.
- Both media documents in the *documents* list refer to a location on a local disk or a mounted disk. If this document is not on a local disk or mounted disk then URLs should be used.
- Each view has some metadata spelling out several kinds of things:
- The application that created the view.
- A timestamp of when the view was created.
- What kind of annotations are in the view and what metadata are there on those annotations (for example, in the view with id=v2, the *contains* field has a property "http://mmif.clams.ai/vocabulary/TimeFrame/v2" with a dictionary as the value and that dictionary contains the metadata. Here the metadata specify what document the annotations are over what unit is used for annotation offsets.

Only one annotation is shown for each view, this is to keep the file as small as possible. Of course, often the bars-and-tones and slate views often have only one annotation so it is likely only the tokens view where annotations were left out.



## Full MMIF File

```json
{% include_relative raw.json %}
```



96 changes: 96 additions & 0 deletions docs/1.0.1/samples/bars-tones-slates/raw.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
{
"metadata": {
"mmif": "http://mmif.clams.ai/1.0.1"
},
"documents": [
{
"@type": "http://mmif.clams.ai/vocabulary/VideoDocument/v1",
"properties": {
"id": "m1",
"mime": "video/mp4",
"location": "file:///var/archive/video-0012.mp4"
}
},
{
"@type": "http://mmif.clams.ai/vocabulary/TextDocument/v1",
"properties": {
"id": "m2",
"mime": "text/plain",
"location": "file:///var/archive/video-0012-transcript.txt"
}
}
],
"views": [
{
"id": "v1",
"metadata": {
"app": "http://apps.clams.ai/bars-and-tones/1.0.5",
"timestamp": "2020-05-27T12:23:45",
"contains": {
"http://mmif.clams.ai/vocabulary/TimeFrame/v2": {
"document": "m1",
"timeUnit": "seconds"
}
}
},
"annotations": [
{
"@type": "http://mmif.clams.ai/vocabulary/TimeFrame/v2",
"properties": {
"id": "s1",
"start": 0,
"end": 5,
"frameType": "bars-and-tones"
}
}
]
},
{
"id": "v2",
"metadata": {
"app": "http://apps.clams.ai/slates/1.0.3",
"timestamp": "2020-05-27T12:23:45",
"contains": {
"http://mmif.clams.ai/vocabulary/TimeFrame/v2": {
"document": "m1",
"timeUnit": "seconds"
}
}
},
"annotations": [
{
"@type": "http://mmif.clams.ai/vocabulary/TimeFrame/v2",
"properties": {
"id": "s1",
"start": 25,
"end": 38,
"frameType": "slate"
}
}
]
},
{
"id": "v3",
"metadata": {
"app": "http://apps.clams.ai/spacy/1.3.0",
"timestamp": "2020-05-27T12:25:15",
"contains": {
"http://vocab.lappsgrid.org/Token": {
"document": "m2"
}
}
},
"annotations": [
{
"@type": "http://vocab.lappsgrid.org/Token",
"properties": {
"id": "s1",
"start": 0,
"end": 3,
"word": "The"
}
}
]
}
]
}
Loading

0 comments on commit 7019c85

Please sign in to comment.