diff --git a/CHANGELOG.md b/CHANGELOG.md index 1834817c..22065316 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,10 @@ The format is loosely based on [Keep a Changelog](http://keepachangelog.com/). L This file documents changes made to the MMIF specification. Version names used to start with `spec-` because the Python MMIF SDK was also maintained in this repository. Starting with version 0.2.2 the repository was split and the prefix was discarded. +## Version 1.0.1 - 2024-02-07 +- vocabulary types now have `similarTo` field to link similar type definitions as URI (https://github.com/clamsproject/mmif/issues/203). +- updated `TimeFrame` definition to ease `frameType` value restrictions (https://github.com/clamsproject/mmif/issues/207). + ## Version 1.0.0 - 2023-05-26 - Re-release of 0.5.0 (our last release candidate) as 1.0.0 stable version. diff --git a/VERSION b/VERSION index 3eefcb9d..7dea76ed 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -1.0.0 +1.0.1 diff --git a/build.py b/build.py index c3b34bf9..8a00a4ae 100644 --- a/build.py +++ b/build.py @@ -322,8 +322,6 @@ def _add_head(self, cur_vocab_ver) -> None: dtrs.append(HREF('/'.join(['..'] * len(uri_suffix) + uri_suffix), n['name'])) dtrs.append(SPAN('>')) dtrs.append(SPAN(self.clams_type['name'])) - latest = tag('p', text=f'from {cur_vocab_ver} (last updated)') - dtrs.append(latest) p = tag('p', {'class': 'head'}, dtrs=dtrs) self.main_content.append(p) self._add_space() @@ -350,6 +348,9 @@ def get_identity_row(identity_url): elif self.clams_type['version'] == 'v2' and self.clams_type['name'] == 'Annotation': children.append( get_identity_row(f'https://mmif.clams.ai/0.4.2/vocabulary/{self.clams_type["name"]}/')) + if 'similarTo' in self.clams_type: + for s in self.clams_type['similarTo']: + children.append(TABLE_ROW([tag('td', text='Similar to'), tag('td', dtrs=[HREF(s, s)])])) table = TABLE(dtrs=children) self.main_content.append(table) @@ -398,8 +399,8 @@ def _add_properties_aux(self, properties) -> None: def _add_header(self) -> None: header = DIV({'id': 'pageHeader'}, dtrs=[ - H1(f'{VOCAB_TITLE}'), - H2(f'{self.clams_type["name"]} ({self.clams_type["version"]})'), + H1(f'{self.clams_type["name"]} ({self.clams_type["version"]})'), + H2(f'{VOCAB_TITLE}'), ]) self.intro.append(header) @@ -527,14 +528,44 @@ def build_vocab(src, index_dir, mmif_version, item_dir) -> Tree: attype_versions_included[attypename][attypever].append(old_ver) old_types = {t['name']: t for t in last_clams_types} + tree = Tree(new_clams_types) + + def how_different(type1, type2): + """ + return 0 if the types are the same, + 1 if the differences should be propagated to the children + 2 if the types are different in description and parent-ship only (no propagation), + """ + for inheritable in ('properties', 'metadata'): + if type1.get(inheritable, {}) != type2.get(inheritable, {}): + return 1 + if type1['description'] != type2['description'] or type1['parent'] != type2['parent']: + return 2 + return 0 + + updated = collections.defaultdict(lambda: False) + + def propagate_version_changes(node, parent_changed=False): + if parent_changed: + updated[node['name']] = True + for child in node['childNodes']: + propagate_version_changes(child, True) + else: + difference = how_different(node, old_types[node['name']]) + if difference > 0: + updated[node['name']] = True + for child in node['childNodes']: + propagate_version_changes(child, difference == 1) + + root = tree.root + propagate_version_changes(root, False) + for t in new_clams_types: v = latest_attype_vers[t['name']] - if t != old_types[t['name']]: + if updated[t['name']]: v += 1 t['version'] = format_attype_version(v) - tree = Tree(new_clams_types) - # the main `x.y.z/vocabulary/index.html` page with the vocab tree IndexPage(tree, index_dir, mmif_version).write() # then, redirection HTML files for each vocab types to its own versioned html page diff --git a/docs/1.0.1/index.md b/docs/1.0.1/index.md new file mode 100644 index 00000000..7dea0c90 --- /dev/null +++ b/docs/1.0.1/index.md @@ -0,0 +1,551 @@ +--- +layout: page +title: MMIF Specification +subtitle: Version 1.0.1 +--- + +MMIF is an annotation format for audiovisual media and associated text like transcripts and closed captions. It is a JSON-LD format used to transport data between CLAMS apps and is inspired by and partially based on LIF, the [LAPPS Interchange Format](https://wiki.lappsgrid.org/interchange/). MMIF is pronounced *mif* or *em-mif*, or, if you like to hum, *mmmmmif*. + +MMIF consists of two formal components in addition to this more informal specification: +1. The JSON schema: + - [https://mmif.clams.ai/1.0.1/schema/mmif.json](schema/mmif.json) +1. The Vocabularies (the type hierarchies): + - [https://mmif.clams.ai/1.0.1/vocabulary](vocabulary) + - [http://vocab.lappsgrid.org](http://vocab.lappsgrid.org) + +The JSON schema for MMIF defines the syntactic elements of MMIF which will be explained at length in ["structure" section](#the-structure-of-mmif-files). These specifications often refer to elements from the CLAMS and LAPPS Vocabularies which define concepts and their ontological relations, see ["vocabulary" section](#mmif-and-the-vocabularies) for notes on those vocabularies. + +Along with the formal specifications and documentation we also provide a reference implementation of MMIF. It is developed in the Python programming language, and it will be distributed via GitHub (as source code) as well as via the [Python Package Index](https://pypi.org/) (as a Python library). The package will function as a software development kit (SDK), that helps users (mostly developers) to easily use various features of MMIF in developing their own applications. + +We use [semantic versioning](https://semver.org/) with the `major.minor.patch` version scheme. All formal components (this document, the JSON schema and CLAMS vocabulary) share the same version number, while the `mmif-python` Python SDK shares `major` and `minor` numbers with the specification version. See the [versioning notes](../versioning) for more information on compatibility between different versions and how it plays out when chaining CLAMS apps in a pipeline. + +## Table of Contents +{:.no_toc} + +1. toc placeholder +{:toc} + +## The format of MMIF files +As mentioned, MMIF is JSON in essence. When serialized to a physical file, the file must use **Unicode** charset encoded in **UTF-8**. + +## The structure of MMIF files + +The [JSON schema](schema/mmif.json) formally define the syntactic structure of a MMIF file. This section is an informal companion to the schema and gives further information. + +In essence, a MMIF file represents two things: + +1. Media like texts, videos, images and audio recordings. We will call these *documents*. +2. Annotations over those media representing information that was added by CLAMS processing. + +Annotations are always stored separately from the media. They can be directly linked to a slice in the media (a string in a text, a shape in an image, or a time frame in a video or audio) or they can refer to other annotations, for example to specify relations between text strings. More specifically, a MMIF file contains some metadata, a list of media and a list of annotation views, where each view contains a list of annotation types like Segment, BoundingBox, VideoObject or NamedEntity. + +The top-level structure of a MMIF file is as follows: + +```json +{ + "metadata": { + "mmif": "http://mmif.clams.ai/1.0.1" }, + "documents": [ ], + "views": [ ] +} +``` + +The `metadata` property stores metadata associated with the file. It is not heavily used for now, but we do use it to store the MMIF version used in the document. The `mmif` metadata property is required. You are allowed to add any other metadata properties. + + + +### The *documents* property + +We assume that when a MMIF document is initialized it is given a list of media and each of these media is either an external file or a text string. These media are all imported into the MMIF file as documents of a certain type and the specifications for each medium/document is stored in the `documents` list. This list is read-only and cannot be extended after initialization. There are no limits on how many documents and how many documents of what types are in the list, but typically there will be just a few documents in there. + +Here is an example document list with a video and its transcript: + +```json +{ + "documents": [ + { + "@type": "http://mmif.clams.ai/vocabulary/VideoDocument/v1", + "properties": { + "id": "m1", + "mime": "video/mpeg", + "location": "file:///var/archive/video-0012.mp4" } + }, + { + "@type": "http://mmif.clams.ai/vocabulary/TextDocument/v1", + "properties": { + "id": "m2", + "mime": "text/plain", + "location": "file:///var/archive/transcript-0012.txt" } + } + ] +} +``` + +The `@type` key has a special meaning in JSON-LD and it is used to define the type of data structure. In MMIF, the value should be a URL that points to a description of the type of document. Above we have a video and a text document and those types are described at [http://mmif.clams.ai/vocabulary/VideoDocument](vocabulary/VideoDocument) and [http://mmif.clams.ai/vocabulary/TextDocument](vocabulary/TextDocument) respectively. Currently, four document types are defined: *VideoDocument*, *TextDocument*, *ImageDocument* and *AudioDocument*. + +The description also lists the properties that can be used for a type, and above we have the `id`, `mime` and `location` properties, used for the document identifier, the document's MIME type and the location of the document, which is a URL. Should the document be a local file then the `file://` scheme must be used. Alternatively, and for text only, the document could be inline, in which case the element is represented as in the `text` property in LIF, using a JSON [value object](http://www.w3.org/TR/json-ld/#dfn-value-object) containing a `@value` key and optionally a `@language` key: + +``` json +{ + "documents": [ + { + "@type": "http://mmif.clams.ai/vocabulary/VideoDocument/v1", + "properties": { + "id": "m1", + "mime": "video/mpeg", + "location": "file:///var/archive/video-0012.mp4" } + }, + { + "@type": "http://mmif.clams.ai/vocabulary/TextDocument/v1", + "properties": { + "id": "m1", + "text": { + "@value": "Sue flew to Bloomington.", + "@language": "en" } } + } + ] +} +``` + +The value associated with `@value` is a string and the value associated with `@language` follows the rules in [BCP47](http://www.w3.org/TR/json-ld/#bib-bcp47), which for our current purposes boils down to using the two-character ISO 639 code. With inline text no MIME type is needed. + + + + +### The *views* property + +This is where all the annotations and associated metadata live. Views contain structured information about documents but are separate from those documents. The value of `views` is a JSON-LD array of view objects where each view specifies what documents the annotation is over, what information it contains and what app created that information. To that end, each view has four properties: `id`, `metadata` and `annotations`. + +```json +{ + "views": [ + { + "id": "v1", + "metadata": { }, + "annotations": [ ] + } + ] +} +``` + +Each view has a unique identifier. Annotation elements in the view have identifiers unique to the view and these elements can be uniquely referred to from outside the view by using the view identifier and the annotation element identifier, separated by a colon. For example, if the view above has an annotation with identifier "a8" then it can be referred to from outside the view by using "v1:a8". + +Here are a few general principles relevant to views: + +1. There is no limit to the number of views. +2. Apps may create as many new views as they want. +3. Apps may not change or add information to existing views, that is, views are read-only, which has many advantages at the cost of some redundancy. Since views are read-only, apps may not overwrite or delete information in existing views. This holds for the view’s metadata as well as the annotations. +4. Annotations in views have identifiers that are unique to the view. Views have identifiers that uniquely define them relative to other views. + +We now describe the metadata and the annotations. + + + +#### The *view's metadata* property + +This property contains information about the annotations in a view. Here is an example for a view over a video with medium identifier "m1" with segments added by the CLAMS bars-and-tones application: + +```json +{ + "app": "http://apps.clams.ai/bars-and-tones/1.0.5", + "timestamp": "2020-05-27T12:23:45", + "contains": { + "http://mmif.clams.ai/vocabulary/TimeFrame/v2": { + "timeUnit": "seconds", + "document": "m1" + } + }, + "parameters": {"threshold": "0.5", "not-defined-parameter": "some-value"}, +} +``` + +The `timestamp` key stores when the view was created by the application. This is using the ISO 8601 format where the T separates the date from the time of the day. The timestamp can also be used to order views, which is significant because by default arrays in JSON-LD are not ordered. + +The `app` key contains an identifier that specifies what application created the view. The identifier must be a URL form, and HTTP webpage pointed by the URL should contain all app metadata information relevant for the application: description, configuration, input/output specifications and a more complete description of what output is created. The app identifier always includes a version number for the app. The metadata should also contain a link to the public code repository for the app (and that repository will actually maintain all the information in the URL). + +The `parameters` is a dictionary of runtime parameters and their *string* values, if any. The primary purpose of this dictionary is to record the parameters "as-is" for reproducibility and accountability. Note that CLAMS apps are developed to run as HTTP servers, expecting parameters to be passed as URL query strings. Hence, the values in the `parameters` dictionary are always strings or simple lists of strings. + +The `contains` dictionary has keys that refer to annotation objects in the CLAMS or LAPPS vocabulary, or user-defined objects. Namely, they indicate the kind of annotations that live in the view. The value of each of those keys is a JSON object which contains metadata specified for the annotation type. The example above has one key that indicates that the view contains *TimeFrame* annotations, and it gives two metadata values for that annotation type: + +1. The `document` key gives the identifier of the document that the annotations of that type in this view are over. As we will see later, annotations anchor into documents using keys like `start` and `end` and this property specifies what document that is. +2. The `timeUnit` key is set to "seconds" and this means that for each annotation the unit for the values in `start` and `end` are seconds. + +Every annotation type defined in the CLAMS vocabulary has two feature structures - `metadata` and `properties`. See [this definition of *TimeFrame*](vocabulary/TimeFrame/) type in the vocabulary for an example. As we see here, `contains` dictionary in a view's metadata is used to assign values to metadata keys. We'll see in the following section that individual annotation objects are used to assign values to `properties` keys. +{: .box-note} + +Note that when a property is set to some value in the `contains` in the view metadata then all annotations of that type should adhere to that value, in this case the `document` and `timeUnit` are set to *"m1"* and *"seconds"* respectively. In other words, the `contains` dictionary not only functions as an overview of the annotation types in this view, but also as a place for common metadata shared among annotations of a type. This is useful especially for `document` property, as in a single view, an app is likely to process only a limited number of source documents and resulting annotation objects will be anchored on those documents. It is technically possible for *TimeFrame* type to add `document` properties to individual annotation objects and overrule the metadata property, but this is not to be done without really good reasons. We get back to this later. + +For annotation types that are used to measure time (such as *TimePoint*, *TimeFrame*, or *VideoObject*), the unit of the measurement (`timeUnit`) must be specified in the `contains`. However, for objects that measure image regions (such as [*BoundingBox*](vocabulary/BoundingBox).`coordinates`), the *unit* is always assumed to be *pixels*. That is, a coordinate is numbers of pixels from a point in an image to the origin along all axes, where the origin (*(0,0)*) is always the top-left point of the image. Similarly, for objects that measure text spans (such as [*Span*](vocabulary/Span).start/end), the *unit* of counting characters must always be code points. As mentioned above, MMIF must be serialized to a UTF-8 Unicode file. +{: .box-note} + +Next section has more details on the interaction between the vocabulary and the metadata for the annotation types in the `contains` dictionary. + +When an app fails to process the input for any reason and produces an error, it can record the error in the `error` field, instead of in `contains`. When this happens, the annotation list of the view must remain empty. Here is an example of a view with an error. + +```json +{ + "id": "v1", + "metadata": { + "app": "http://apps.clams.ai/bars-and-tones/1.0.5", + "timestamp": "2020-05-27T12:23:45", + "error": { + "message": "FileNotFoundError: /data/input.mp4 from Document d1 is not found.", + "stackTrace": "Optionally, some-stack-traceback-information" + }, + "parameters": {} + }, + "annotations": [] +} +``` + +Finally, an app may produce one or more warnings and still successfully process input and create annotations. In that case one extra view is added that has no annotations and that instead of the `contains` field has a `warnings` field which presents the warning messages as a list of strings. + +```json +{ + "id": "v2", + "metadata": { + "app": "http://apps.clams.ai/bars-and-tones/1.0.5", + "timestamp": "2020-05-27T12:23:45", + "warnings": ["Missing parameter frameRate, using default value."], + "parameters": {} + }, + "annotations": [] +} +``` + + + +#### The *view's annotations* property + +The value of the `annotations` property on a view is a list of annotation objects. Here is an example of an annotation object: + +```json +{ + "@type": "http://mmif.clams.ai/vocabulary/TimeFrame/v2", + "properties": { + "id": "f1", + "start": 0, + "end": 5, + "frameType": "bars-and-tones" + } +} +``` + +The two required keys are `@type` and `properties`. As mentioned before, the `@type` key in JSON-LD is used to define the type of data structure. The `properties` dictionary typically contains the features defined for the annotation category as defined in the vocabularies at [CLAMS vocabulary ](vocabulary) or [LAPPS vocabulary](http://vocab.lappsgrid.org/). For example, for the *TimeFrame* annotation type the vocabulary includes the feature `frameType` as well as the inherited features `id`, `start` and `end`. Values should be as specified in the vocabulary, values typically are strings, identifiers and integers, or lists of strings, identifiers and integers. + +The `id` key should have a value that is unique relative to all annotation elements in the view. Other annotations can refer to this identifier either with just the identifier (for example “s1”), or the identifier with a view identifier prefix (for example “v1:s1”). If there is no prefix, the current view is assumed. + +We will discuss more details on annotation type vocabularies in the ["vocabulary" section](#mmif-and-the-vocabularies). +{: .box-note} + +The annotations list is shallow, that is, all annotations in a view are in that list and annotations are not embedded inside other annotations. For example, LAPPS *Constituent* annotations will not contain other *Constituent* annotations. However, in the `properties` dictionary annotations can refer to other annotations using the identifiers of the other annotations. + +Here is another example of a view containing two bounding boxes created by the EAST text recognition app: + +```json +{ + "id": "v1", + "metadata": { + "app": "http://apps.clams.io/east/1.0.4", + "timestamp": "2020-05-27T12:23:45", + "contains": { + "http://mmif.clams.ai/vocabulary/BoundingBox/v1": { + "document": "image3" + } + } + }, + "annotations": [ + { "@type": "http://mmif.clams.ai/vocabulary/BoundingBox/v1", + "properties": { + "id": "bb0", + "coordinates": [[10,20], [60,20], [10,50], [60,50]] } + }, + { "@type": "http://mmif.clams.ai/vocabulary/BoundingBox/v1", + "properties": { + "id": "bb1", + "coordinates": [[90,40], [110,40], [90,80], [110,80]] } + } + ] +} +``` + +Note how the `coordinates` property is a list of lists where each embedded list is a pair of an x-coordinate and a y-coordinate. + + + + +### Views with documents + +We have seen that an initial set of media is added to the MMIF `documents` list and that applications then create views from those documents. But some applications are special in that they create text from audiovisual data and the annotations they create are similar to the documents in the `documents` list in that they could be the starting point for a text processing chain. For example, Tesseract can take a bounding box in an image and generate text from it and a Named Entity Recognition (NER) component can take the text and extract entities, just like it would from a transcript or other text document in the `documents` list. + +Let's use an example of an image of a barking dog where a region of the image has been recognized by the EAST application as an image box containing text (image taken from [http://clipart-library.com/dog-barking-clipart.html](http://clipart-library.com/dog-barking-clipart.html)): + +yelp + +The result of this processing is a MMIF document with an image document and a view that contains a *BoundingBox* annotation where the bounding box has the `boxType` property set to "text": + +```json +{ + "documents": [ + { + "@type": "http://mmif.clams.ai/vocabulary/ImageDocument/v1", + "properties": { + "id": "m1", + "mime": "image/jpeg", + "location": "file:///var/archive/image-0012.jpg" } + } + ], + "views": [ + { + "id": "v1", + "metadata": { + "app": "http://mmif.clams.ai/apps/east/0.2.2", + "contains": { + "http://mmif.clams.ai/vocabulary/BoundingBox/v1": { + "document": "m1" } } + }, + "annotations": [ + { + "@type": "http://mmif.clams.ai/vocabulary/BoundingBox/v1", + "properties": { + "id": "bb1", + "coordinates": [[10,20], [40,20], [10,30], [40,30]], + "boxType": "text" } + } + ] + } + ] +} +``` + +Tesseract will then add a view to this MMIF document that contains a text document as well as an *Alignment* type that specifies that the text document is aligned with the bounding box from view "v1". + +```json +{ + "id": "v2", + "metadata": { + "app": "http://mmif.clams.ai/apps/tesseract/0.2.2", + "contains": { + "http://mmif.clams.ai/vocabulary/TextDocument/v1" : {}, + "http://mmif.clams.ai/vocabulary/Alignment/v1": {} } + }, + "annotations": [ + { + "@type": "http://mmif.clams.ai/vocabulary/TextDocument/v1", + "properties": { + "id": "td1", + "text": { + "@value": "yelp" } } + }, + { + "@type": "http://mmif.clams.ai/vocabulary/Alignment/v1", + "properties": { + "source": "v1:bb1", + "target": "td1" } + } + ] +} +``` + +The text document annotation is the same kind of objects as the text document objects in the toplevel `documents` property, it has the same type and uses the same properties. Notice also that the history of the text document, namely that it was derived from a particular bounding box in a particular image, can be traced via the alignment of the text document with the bounding box. + +An alternative for using an alignment would be to use a `textSource` property on the document or perhaps to reuse the `location` property. That would require less space, but would introduce another ways to align annotations. +{: .box-note} + +Now this text document can be input to language processing. An NER component will not do anything interesting with this text so let's say we have a semantic typing component that has *"dog-sound"* as one of its categories. That hypothetical semantic typing component would add a new view to the list. That semantic typing component would add a new view to the list: + +```json +{ + "id": "v3", + "metadata": { + "app": "http://mmif.clams.ai/apps/semantic-typer/0.2.4", + "contains": { + "http://vocab.lappsgrid.org/SemanticTag": { + "document": "v2:td1" } } + }, + "annotations": [ + { + "@type": "http://vocab.lappsgrid.org/SemanticTag", + "properties": { + "id": "st1", + "category": "dog-sound", + "start": 0, + "end": 4 } + } + ] +} +``` + +This view encodes that the span from character offset 0 to character offset 4 contains a semantic tag and that the category is "dog-sound". This type can be traced to *TextDocument* "td1" in view "v2" via the `document` metadata property, and from there to the bounding box in the image. + +See ["examples" section](#mmif-examples) with the MMIF examples for a more realistic and larger example. + +We are here abstracting away from how the actual processing would proceed since we are focusing on the representation. In short, the CLAMS platform knows what kind of input an application requires and it would now that an NLP application requires a *TextDocument* to run on and it knows how to find all instance of *TextDocument* in a MMIF file. +{: .box-note} + + + + +### Multiple text documents in a view + +The image with the dog in the previous section just had a bounding box for the part of the image with the word *yelp*, but there were three other image regions that could have been input to OCR as well. With more boxes we just add more text documents and more alignments, here shown for one additional box: + +```json +{ + "id": "v2", + "metadata": { + "app": "http://mmif.clams.ai/apps/tesseract/1.0.1", + "contains": { + "http://mmif.clams.ai/vocabulary/TextDocument/v1": {}, + "http://mmif.clams.ai/vocabulary/Alignment/v1": {} } + }, + "annotations": [ + { + "@type": "http://mmif.clams.ai/vocabulary/TextDocument/v1", + "properties": { + "id": "td1", + "text": { + "@value": "yelp" } } + }, + { + "@type": "http://mmif.clams.ai/vocabulary/Alignment/v1", + "properties": { + "source": "v1:bb1", + "target": "td1" } + }, + { + "@type": "http://mmif.clams.ai/vocabulary/TextDocument/v1", + "properties": { + "id": "td2", + "text": { + "@value": "woof" } } + }, + { + "@type": "http://mmif.clams.ai/vocabulary/Alignment/v1", + "properties": { + "source": "v1:bb2", + "target": "td2" } + } + ] +} +``` + +This of course assumes that view "v1" has a bounding box identified by "v1:bb2". + +Now if you run the semantic tagger you would get tags with the category set to "dog-sound": + +```json +{ + "id": "v3", + "metadata": { + "app": "http://mmif.clams.ai/apps/semantic-typer/0.2.4", + "contains": { + "http://mmif.clams.ai/vocabulary/SemanticTag/v1": {} } + }, + "annotations": [ + { + "@type": "http://mmif.clams.ai/vocabulary/SemanticTag/v1", + "properties": { + "id": "st1", + "category": "dog-sound", + "document": "V2:td1", + "start": 0, + "end": 4 } + }, + { + "@type": "http://mmif.clams.ai/vocabulary/SemanticTag/v1", + "properties": { + "id": "st2", + "category": "dog-sound", + "document": "V2:td2", + "start": 0, + "end": 4 } + } + ] +} +``` + +Notice how the document to which the *SemanticTag* annotations point is not expressed by the metadata `document` property but by individual `document` properties on each semantic tag. This is unavoidable when we have multiple text documents that can be input to language processing. + +The above glances over the problem that we need some way for Tesseract to know what bounding boxes to take. We can do that by either introducing some kind of type or use the `app` property in the metadata or maybe by introducing a subtype for BoundingBox like TextBox. In general, we may need to solve what we never really solved for LAPPS which is what view should be used as input for an application. +{: .box-note} + + + +## MMIF and the Vocabularies + +The structure of MMIF files is defined in the [schema](schema/mmif.json) and described in this document. But the semantics of what is expressed in the views are determined by the [CLAMS Vocabulary](vocabulary). Each annotation in a view has two fields: `@type` and `properties`. The value of the first one is typically an annotation type from the vocabulary. Here is a *BoundingBox* annotation as an example: + +```json +{ + "@type": "http://mmif.clams.ai/vocabulary/BoundingBox/v1", + "properties": { + "id": "bb1", + "coordinates": [[0,0], [10,0], [0,10], [10,10]] + } +} +``` + +The value of `@type` refers to the URL [http://mmif.clams.ai/vocabulary/BoundingBox/v1](vocabulary/BoundingBox) which is a page in the published vocabulary. That page will spell out the definition of *BoundingBox* as well as list all properties defined for it, whether inherited or not. On the page we can see that `id` is a required property inherited from *Annotation* and that `coordinates` is a required property of *BoundingBox*. Both are expressed in the `properties` dictionary above. The page also says that there is an optional property `timePoint`, but it is not used above. + +You might also have noticed by now that these URL-formatted values to this key end with some version number (e.g. `/v1`), which is different from the version of this document. That is because each individual annotation type (and document type in `documents` list) has its own version independent of the MMIF version. The independent versioning of annotation types enables type checking mechanism in CLAMS pipelines. See [versioning notes](../versioning) for more details. + +As displayed in the vocabulary, annotation types are hierarchically structured with `is-a` inheritance relations. That is, all properties from a parent type are *inherited* to their children. The top-level type in the CLAMS vocabulary is [http://mmif.clams.ai/vocabulary/Annotation](vocabulary/Annotation), and it can be generally used for attaching a piece of information (annotation) to a source document, using `document` property to indicate the source document. If an annotation is specifically about (or derived from) a part of the document (for example, a certain sentence in the text or a certain area of the image, etc.), one should consider one of the *Annotation*'s children that can anchor to the part that suits semantics and purpose of the annotation. Again, the annotation object can (and probably should) use the `document` property with a source document identifier, as long as the type is a sub-type of the *Annotation*. We will see concrete examples in the below. + +The [http://mmif.clams.ai/vocabulary/Thing](vocabulary/Thing) type is designed only as a placeholder and is not intended to be used to represent actual annotations. +{: .box-note} + +The vocabulary also defines `metadata` properties. For example, the optional property `timeUnit` can be used for a *TimeFrame* to specify what unit is used for the start and end time points in instances of *TimeFrame*. This property is not expressed in the annotation but in the metadata of the view with the annotation type in the `contains` dictionary: + +As aforementioned, the *Annotation* type and its children can put the source document identifier in the `contains` dictionary, using `document` metadata property. Namely, there are two ways to express the source document of annotations: at individual object level or at the view level. Unless there is a good reason to specify document information for each and every annotation objects, using the view-level representation is recommended to save space when the MMIF is serialized to a JSON file. +{: .box-warning} + +```json +{ + "metadata": { + "app": "http://apps.clams.ai/some_time_segmentation_app/1.0.3", + "timestamp": "2020-05-27T12:23:45", + "contains": { + "http://mmif.clams.ai/vocabulary/TimeFrame/v2": { + "document": "m12", + "timeUnit": "milliseconds" } } + } +} +``` + +Annotations in a MMIF file often refer to the LAPPS Vocabulary at [http://vocab.lappsgrid.org](http://vocab.lappsgrid.org). In that case, the annotation type in `@type` will refer to a URL just as with CLAMS annotation types, the only difference is that the URL will be in the LAPPS Vocabulary. Properties and metadata properties of LAPPS annotation types are defined and used the same way as described above for CLAMS types. + +Using a LAPPS type is actually an instance of the more general notion that the value of `@type` can be any URL (actually, any IRI). You can use any annotation category defined elsewhere, for example, you can use categories defined by the creator of an application or categories from other vocabularies. Here is an example with a type from [https://schema.org](https://schema.org): + +```json +{ + "@type": "https://schema.org/Clip", + "properties": { + "id": "clip-29", + "actor": "Geena Davis" + } +} +``` + +This assumes that [https://schema.org/Clip](https://schema.org/Clip) defines all the features used in the `properties` dictionary. One little disconnect here is that in MMIF we insist on each annotation having an identifier in the `id` property and as it happens [https://schema.org](https://schema.org) does not define an `id` attribute, although it does define `identifier`. + +The CLAMS Platform does not require that a URL like [https://schema.org/Clip](https://schema.org/Clip) actually exists, but if it doesn't users of an application that creates the *Clip* type will not know exactly what the application creates. + + + +## MMIF Examples + +To finish off this document we provide some examples of complete MMIF documents: + + +| example | description | +| --------------------------------------------------------- | ------------------------------------------------------------ | +| [bars-tones-slates](samples/bars-tones-slates) | A couple of time frames and some minimal text processing on a transcript. | +| [east-tesseract-typing](samples/east-tesseract-typing) | EAST text box recognition followed by Tesseract OCR and semantic typing. | +| [segmenter-kaldi-ner](samples/segmenter-kaldi-ner) | Audio segmentation followed by Kaldi speech recognition and NER. | +| [everything](samples/everything) | A big MMIF example with various multimodal AI apps for video/audio as well as text. | + +Each example has some comments and a link to a raw JSON file. + +As we move along integrating new applications, other examples will be added with other kinds of annotation types. + diff --git a/docs/1.0.1/pi78oGjdT-annotated.jpg b/docs/1.0.1/pi78oGjdT-annotated.jpg new file mode 100644 index 00000000..88875a55 Binary files /dev/null and b/docs/1.0.1/pi78oGjdT-annotated.jpg differ diff --git a/docs/1.0.1/pi78oGjdT.jpg b/docs/1.0.1/pi78oGjdT.jpg new file mode 100644 index 00000000..69e03023 Binary files /dev/null and b/docs/1.0.1/pi78oGjdT.jpg differ diff --git a/docs/1.0.1/samples/bars-tones-slates/index.md b/docs/1.0.1/samples/bars-tones-slates/index.md new file mode 100644 index 00000000..bc52b80b --- /dev/null +++ b/docs/1.0.1/samples/bars-tones-slates/index.md @@ -0,0 +1,33 @@ +--- +layout: page +title: MMIF Specification +subtitle: Version 1.0.1 +--- + +# Example: Bars and Tones and Slates + +To see the full example scroll down to the end or open the [raw json file](raw.json). + +This is a minimal example that contains two media documents, one pointing at a video and the other at a transcript. For the first document there are two views, one with bars-and-tone annotations and one with slate annotations. For the second document there is one view with the results of a tokenizer. This example file, while minimal, has everything required by MMIF. + +Some notes: + +- The metadata just specify the MMIF version. +- Both media documents in the *documents* list refer to a location on a local disk or a mounted disk. If this document is not on a local disk or mounted disk then URLs should be used. +- Each view has some metadata spelling out several kinds of things: + - The application that created the view. + - A timestamp of when the view was created. + - What kind of annotations are in the view and what metadata are there on those annotations (for example, in the view with id=v2, the *contains* field has a property "http://mmif.clams.ai/vocabulary/TimeFrame/v2" with a dictionary as the value and that dictionary contains the metadata. Here the metadata specify what document the annotations are over what unit is used for annotation offsets. + +Only one annotation is shown for each view, this is to keep the file as small as possible. Of course, often the bars-and-tones and slate views often have only one annotation so it is likely only the tokens view where annotations were left out. + + + +## Full MMIF File + +```json +{% include_relative raw.json %} +``` + + + diff --git a/docs/1.0.1/samples/bars-tones-slates/raw.json b/docs/1.0.1/samples/bars-tones-slates/raw.json new file mode 100644 index 00000000..7dc69067 --- /dev/null +++ b/docs/1.0.1/samples/bars-tones-slates/raw.json @@ -0,0 +1,96 @@ +{ + "metadata": { + "mmif": "http://mmif.clams.ai/1.0.1" + }, + "documents": [ + { + "@type": "http://mmif.clams.ai/vocabulary/VideoDocument/v1", + "properties": { + "id": "m1", + "mime": "video/mp4", + "location": "file:///var/archive/video-0012.mp4" + } + }, + { + "@type": "http://mmif.clams.ai/vocabulary/TextDocument/v1", + "properties": { + "id": "m2", + "mime": "text/plain", + "location": "file:///var/archive/video-0012-transcript.txt" + } + } + ], + "views": [ + { + "id": "v1", + "metadata": { + "app": "http://apps.clams.ai/bars-and-tones/1.0.5", + "timestamp": "2020-05-27T12:23:45", + "contains": { + "http://mmif.clams.ai/vocabulary/TimeFrame/v2": { + "document": "m1", + "timeUnit": "seconds" + } + } + }, + "annotations": [ + { + "@type": "http://mmif.clams.ai/vocabulary/TimeFrame/v2", + "properties": { + "id": "s1", + "start": 0, + "end": 5, + "frameType": "bars-and-tones" + } + } + ] + }, + { + "id": "v2", + "metadata": { + "app": "http://apps.clams.ai/slates/1.0.3", + "timestamp": "2020-05-27T12:23:45", + "contains": { + "http://mmif.clams.ai/vocabulary/TimeFrame/v2": { + "document": "m1", + "timeUnit": "seconds" + } + } + }, + "annotations": [ + { + "@type": "http://mmif.clams.ai/vocabulary/TimeFrame/v2", + "properties": { + "id": "s1", + "start": 25, + "end": 38, + "frameType": "slate" + } + } + ] + }, + { + "id": "v3", + "metadata": { + "app": "http://apps.clams.ai/spacy/1.3.0", + "timestamp": "2020-05-27T12:25:15", + "contains": { + "http://vocab.lappsgrid.org/Token": { + "document": "m2" + } + } + }, + "annotations": [ + { + "@type": "http://vocab.lappsgrid.org/Token", + "properties": { + "id": "s1", + "start": 0, + "end": 3, + "word": "The" + } + } + ] + } + ] +} diff --git a/docs/1.0.1/samples/east-tesseract-typing/index.md b/docs/1.0.1/samples/east-tesseract-typing/index.md new file mode 100644 index 00000000..e1d08c04 --- /dev/null +++ b/docs/1.0.1/samples/east-tesseract-typing/index.md @@ -0,0 +1,177 @@ +--- +layout: page +title: MMIF Specification +subtitle: Version 1.0.1 +--- + + + +# Example: EAST, Tesseract and Typing + +This example contains one image document which points to this image: + + + +In addition, there are three views, one created by EAST, one by Tesseract and one by a semantic typing component. We now give fragments of the four relevant parts of the MMIF file, each with some comments. + +To see the full example scroll down to the end or open the [raw json file](raw.json). + +### Fragment 1: the documents list + +```json +{ + "documents": [ + { + "@type": "http://mmif.clams.ai/vocabulary/ImageDocument/v1", + "properties": { + "id": "m1", + "mime": "image/jpg", + "location": "/var/archive/image-fido-barks.jpg" } + } + ] +} +``` +This is simply just a list with one *ImageDocument* which points at the file with the barking dog image. + +### Fragment 2: the EAST view + +Here are the metadata in this view: + +```json +{ + "app": "http://mmif.clams.ai/apps/east/0.2.1", + "contains": { + "http://mmif.clams.ai/1.0.1/BoundingBox": { + "timeUnit": "pixels", + "document": "m1" } } +} +``` + +It simply says that EAST created the view and that all bounding box annotations are over document "m1" using pixels as the unit. + +And here is the annotations list: + + +```json +[ + { + "@type": "http://mmif.clams.ai/vocabulary/BoundingBox/v1", + "properties": { + "id": "bb1", + "coordinates": [[10,20], [40,20], [10,30], [40,30]], + "boxType": "text" } + }, + { + "@type": "http://mmif.clams.ai/vocabulary/BoundingBox/v1", + "properties": { + "id": "bb2", + "coordinates": [[210,220], [240,220], [210,230], [240,230]], + "boxType": "text" } + } +] +``` + +EAST has found two text boxes: one for "Arf" and one for "yelp" (although after EAST runs we do not know yet what the actual text is). Text boxes are encoded simply by specifying what the type of the bounding box is. For the sake of a somewhat smaller example file we are assuming here that EAST does not find text boxes when the text slants down. Note also that the coordinates are made up and bear little relation to what the real coordinates are. + +### Fragment 3: the Tesseract view + +Metadata: + +```json +{ + "app": "http://mmif.clams.ai/apps/tesseract/0.2.1", + "contains": { + "http://mmif.clams.ai/0.1.0/vocabulary/TextDocument": {}, + "http://mmif.clams.ai/0.1.0/vocabulary/Alignment": {} } +} +``` + +Tesseract creates text documents from bounding boxes with type equal to "text" and creates alignment relations between the documents and the boxes. The interesting thing here is compared to the metadata for the view created by EAST is that here no *document* metadata property is defined. This is because neither *TextDocument* nor *Alignment* need to be directly anchored into a document. + +Annotations list: + +```json +[ + { + "@type": "http://mmif.clams.ai/0.1.0/vocabulary/TextDocument", + "properties": { + "id": "td1", + "text": { + "@value": "Arf" } } + }, + { + "@type": "http://mmif.clams.ai/0.1.0/vocabulary/Alignment", + "properties": { + "id": "a1", + "source": "v1:bb1", + "target": "td1" } + }, + { + "@type": "http://mmif.clams.ai/vocabulary/TextDocument/v1", + "properties": { + "id": "td2", + "text": { + "@value": "yelp" } } + }, + { + "@type": "http://mmif.clams.ai/vocabulary/Alignment/v1", + "properties": { + "id": "a2", + "source": "v1:bb2", + "target": "td2" } + } +] +``` + +The text documents just have identifiers and store the text, they themselves are not aware of where they came from. The alignments link the text documents to bounding boxes in the view created by EAST. + +### Fragment 4: the Semantic Typer view + +Metadata: + +```json +{ + "app": "http://mmif.clams.ai/apps/semantic-typer/0.2.4", + "contains": { + "http://mmif.clams.ai/vocabulary/SemanticTag/v1": {} }, +} + +``` + +Nothing spectacular here. Like the previous view no *document* property is used, but in this case it is because the semantic tags in the annotation list each refer to a different document. + +Annotations list: + +```json +[ + { + "@type": "http://mmif.clams.ai/vocabulary/SemanticTag/v1", + "properties": { + "id": "st1", + "category": "dog-sound", + "document": "V2:td1", + "start": 0, + "end": 4 } + }, + { + "@type": "http://mmif.clams.ai/vocabulary/SemanticTag/v1", + "properties": { + "id": "st2", + "category": "dog-sound", + "document": "V2:td2", + "start": 0, + "end": 4 } + } +] +``` + +Now each annotation needs to have its own *document* property so we know what documents each semantic tag is anchored to. + + + +## Full MMIF File + +```json +{% include_relative raw.json %} +``` + diff --git a/docs/1.0.1/samples/east-tesseract-typing/raw.json b/docs/1.0.1/samples/east-tesseract-typing/raw.json new file mode 100644 index 00000000..fc74fbac --- /dev/null +++ b/docs/1.0.1/samples/east-tesseract-typing/raw.json @@ -0,0 +1,124 @@ +{ + "metadata": { + "mmif": "http://mmif.clams.ai/1.0.1" + }, + "documents": [ + { + "@type": "http://mmif.clams.ai/vocabulary/ImageDocument/v1", + "properties": { + "id": "m1", + "mime": "image/jpeg", + "location": "file:///var/archive/image-fido-barks.jpg" + } + } + ], + "views": [ + { + "id": "v1", + "metadata": { + "app": "http://mmif.clams.ai/apps/east/0.2.1", + "contains": { + "http://mmif.clams.ai/vocabulary/BoundingBox/v1": { + "timeUnit": "pixels", + "document": "m1" + } + } + }, + "annotations": [ + { + "@type": "http://mmif.clams.ai/vocabulary/BoundingBox/v1", + "properties": { + "id": "bb1", + "coordinates": [ [10, 20], [40, 20], [10, 30], [40, 30] ], + "boxType": "text" + } + }, + { + "@type": "http://mmif.clams.ai/vocabulary/BoundingBox/v1", + "properties": { + "id": "bb2", + "coordinates": [ [210, 220], [240, 220], [210, 230], [240, 230] ], + "boxType": "text" + } + } + ] + }, + { + "id": "v2", + "metadata": { + "app": "http://mmif.clams.ai/apps/tesseract/0.2.1", + "contains": { + "http://mmif.clams.ai/0.1.0/TextDocument": {}, + "http://mmif.clams.ai/0.1.0/Alignment": {} + } + }, + "annotations": [ + { + "@type": "http://mmif.clams.ai/vocabulary/TextDocument/v1", + "properties": { + "id": "td1", + "text": { + "@value": "Arf" + } + } + }, + { + "@type": "http://mmif.clams.ai/vocabulary/Alignment/v1", + "properties": { + "id": "a1", + "source": "v1:bb1", + "target": "td1" + } + }, + { + "@type": "http://mmif.clams.ai/vocabulary/TextDocument/v1", + "properties": { + "id": "td2", + "text": { + "@value": "yelp" + } + } + }, + { + "@type": "http://mmif.clams.ai/vocabulary/Alignment/v1", + "properties": { + "id": "a2", + "source": "v1:bb2", + "target": "td2" + } + } + ] + }, + { + "id": "v3", + "metadata": { + "app": "http://mmif.clams.ai/apps/semantic-typer/0.2.4", + "contains": { + "http://mmif.clams.ai/vocabulary/SemanticTag/v1": {} + } + }, + "annotations": [ + { + "@type": "http://mmif.clams.ai/vocabulary/SemanticTag/v1", + "properties": { + "id": "st1", + "category": "dog-sound", + "document": "V2:td1", + "start": 0, + "end": 4 + } + }, + { + "@type": "http://mmif.clams.ai/vocabulary/SemanticTag/v1", + "properties": { + "id": "st2", + "category": "dog-sound", + "document": "V2:td2", + "start": 0, + "end": 4 + } + } + ] + } + ] +} diff --git a/docs/1.0.1/samples/everything/images/newshour-loud-dogs.jpg b/docs/1.0.1/samples/everything/images/newshour-loud-dogs.jpg new file mode 100644 index 00000000..6c7c3cad Binary files /dev/null and b/docs/1.0.1/samples/everything/images/newshour-loud-dogs.jpg differ diff --git a/docs/1.0.1/samples/everything/index.md b/docs/1.0.1/samples/everything/index.md new file mode 100644 index 00000000..c784c1e4 --- /dev/null +++ b/docs/1.0.1/samples/everything/index.md @@ -0,0 +1,236 @@ +--- +layout: page +title: MMIF Specification +subtitle: Version 1.0.1 +--- + +# Example: Everything and the kitchen sink + +To see the full example scroll down to the end or open the [raw json file](raw.json). + +This is an example with a bunch of different annotations created by a variety of tools. For the input we have a short totally made up video which starts with some bars-and-tone and a simple slate. Those are followed by about a dozen seconds of a talking head followed by an image of a barking dog. + + + +The timeline includes markers for seconds. In the views below all anchors will be using milliseconds. + +We apply the following processing tools: + +1. Bars-and-tone extraction +1. Slate extraction +1. Audio segmentation +1. Kaldi speech recognition and alignment +1. EAST text box recognition +1. Tesseract OCR +1. Named entity recognition +1. Slate parsing + +Following now are short explanations of some frgaments of the full MMIF file, some application output was explained in more detail in other examples, refer to those for more details. + +### Extracting time frames + +The first three steps are straightforward and all result in views with time frame annotations (views with id=v1, id=v2 and id=v3). The bars-and-tone and slate extraction applications each find one time frame and the audio segmenter finds two segments with the second one being a speech time frame that starts at about 5500ms from the start. + +```json +{ + "@type": "http://mmif.clams.ai/vocabulary/TimeFrame/v2", + "properties": { + "id": "tf2", + "frameType": "speech", + "start": 5500, + "end": 22000 } +} +``` + +This time frame will provide the input to Kaldi. + +### Kaldi speech recognition + +Kaldi creates one view (with id=v4) which has + +- a text document +- an alignment of that document with the speech time frame from the segmenter +- a list of tokens for the document +- a list of time frames corresponding to each token +- a list of alignments between the tokens and the time frames + +In the metadata it spells out that the offsets of all tokens are taken to be offsets in "td1", which is a text document in the same view. We can do this instead of the alternative (using the *document* property on all tokens) because all tokens are for the same text document. + +```json +{ + "app": "http://mmif.clams.ai/apps/kaldi/0.2.1", + "contains": { + "http://mmif.clams.ai/vocabulary/TextDocument/v1": {}, + "http://vocab.lappsgrid.org/Token": { + "document": "td1" }, + "http://mmif.clams.ai/vocabulary/TimeFrame/v2": { + "timeUnit": "milliseconds", + "document": "m1" }, + "http://mmif.clams.ai/vocabulary/Alignment/v1": {} + } +} +``` + +Note that a text document can refer to its text by either using the *text* property which contains the text verbatim or by referring to an external file using the *location* property, here we use the second approach: + +```json +{ + "@type": "http://mmif.clams.ai/vocabulary/TextDocument/v1", + "properties": { + "id": "td1", + "mime": "text/plain", + "location": "/var/processed/transcript-002.txt" } +} +``` + +For the sake of argument we assume perfect speech recognition, and the content of the external file is as follows. + +> Hello, this is Jim Lehrer with the NewsHour on PBS. In the nineteen eighties, barking dogs have increasingly become a problem in urban areas. + +This text is aligned with the second time frame from the segmenter. + +```json +{ + "@type": "http://mmif.clams.ai/vocabulary/Alignment/v1", + "properties": { + "id": "a1", + "source": "v3:tf2", + "target": "td1" } +} +``` + +See the full example below for all the tokens, time frames for each token and the alignment between the token and the time frame. + +### EAST and Tesseract + +EAST adds bounding boxes anchored to the video document with id=m1: + +```json +{ + "app": "http://mmif.clams.ai/apps/east/0.2.1", + "contains": { + "http://mmif.clams.ai/1.0.1/BoundingBox": { "document": "m1" } +} +``` + +Let's assume that EAST runs on frames sampled from the video at 1 second intervals. For our example that means that EAST finds boxes at time offsets 3, 4, 5 and 21 seconds. Let's assume decent performance where EAST finds all the boxes in the slate and just the caption in the image (but not the barking sounds). Here is one example box annotation: + +```json +{ + "@type": "http://mmif.clams.ai/vocabulary/BoundingBox/v1", + "properties": { + "id": "bb9", + "timePoint": 4000, + "coordinates": [[180, 110], [460, 110], [180, 170], [460, 170]], + "boxType": "text" } +} +``` + +Due to the nature of the input many of the bounding boxes will have identical or near-identical coordinates. For example, there are two more bounding boxes with the coordinates above, one for the box with time offset 3000 and one for the box with time offset 5000. + +Tesseract now runs on all those boxes and creates a text document for each of them. In doing so, it will add these to a new view: +* text documents from each text box +* alignment of that documents to their originating boxes + +Thus, the metadata of the new view would be: + + +```json +{ + "app": "http://mmif.clams.ai/apps/tesseract/0.4.4", + "contains": { + "http://mmif.clams.ai/vocabulary/TextDocument/v1": {}, + "http://mmif.clams.ai/vocabulary/Alignment/v1": { + "sourceType": "http://mmif.clams.ai/vocabulary/TextDocument/v1", + "targetType": "http://mmif.clams.ai/vocabulary/BoundingBox/v1" + } + } +} +``` + +Unlike the alignment annotations in the Kaldi view, Tesseract specifies types of both ends of the alignments in the `contains` metadata. This is only allowed because all alignment annotations in the view have the same source type and target types. This information can help, for example, machines search for certain alignments more quickly. +{: .box-note} + +Now the recognition results are recorded as text documents, here's one: + +```json +{ + "@type": "http://mmif.clams.ai/vocabulary/TextDocument/v1", + "properties": { + "id": "td1", + "text": { "@value": "DATE" } } +} +``` + +And here is the corresponding alignment from the bounding box to the text document: + +```json +{ + "@type": "http://mmif.clams.ai/vocabulary/Alignment/v1", + "properties": { + "id": "a1", + "source": "v5:bb1", + "target": "td1" } +} +``` + +The source is in another view, hence the prefix on the identifier. + +### Named entity recognition + +After Kaldi and Tesseract have added text documents we now have all text extracted from audiovisual elements and we can run NLP tools like named entity recognizers over them. Each entity annotation refers to a text document, either the one in the Kaldi view or one of the documents in the Tesseract view, this examples refers to one of the documents in the Tesseract view: + +```json +{ + "@type": "http://vocab.lappsgrid.org/NamedEntity", + "properties": { + "id": "ne1", + "document": "v6:td2", + "start": 0, + "end": 10, + "category": "Date", + "text": "1982-05-12" } +} +``` + +Note that since there were three text boxes with the date and therefore three documents with the actual text, there are also three named entities for this date. + +### Slate parsing + +This section is somewhat speculative since we have not yet made any decisions on what the output of a slate paser will look like. + +Slate parsing applies to frames in the slate segment found in the slate view (id=v2) and uses several kinds of information obtained from two or three other views: + +- The EAST view has text bounding boxes with coordinates for all those boxes. +- The Tesseract view has the text values for all those boxes. +- The NER view has named entity classes for some of those text values, which may in some cases be useful for slate parsing. + +A minimal option for the slate parser is to create a particular semantic tag dat describes value fields in a slate. For that it may use the the category of the named entity that is anchored to the field or the text in adjacent field. For example, if we have the text "1982-05-12" and we know it was tagged as a *Date* then this may indicate that that value is the air time of the video. Similary, if that value occurs next to a text that has the text "DATE" in it we may also derive that the value was a *Date*. + +Here is the tag annotation on the same document as the named entity annotation above: + +```json +{ + "@type": "http://vocab.lappsgrid.org/SemanticTag", + "properties": { + "id": "st1", + "document": "v6:td2", + "start": 0, + "end": 10, + "tagName": "Date", + "text": "1982-05-12" } +} +``` + +Note that the *tagName* property has the same value as the *category* property on the named entity. This is a coincidence in that there is a named entity category *Date* as well as a slate category *Date*. + +Similar to what we saw for the named entities, there will be multiple versions of this data tag due to multiple text boxes with the same text. + + +## Full MMIF File + +```json +{% include_relative raw.json %} +``` + + diff --git a/docs/1.0.1/samples/everything/pbcore.md b/docs/1.0.1/samples/everything/pbcore.md new file mode 100644 index 00000000..9fa246e4 --- /dev/null +++ b/docs/1.0.1/samples/everything/pbcore.md @@ -0,0 +1,144 @@ +# MMIF and PBCore + +Some notes on the mappings of elements from the MMIF file in the "everything and the kitched sink" example to PBCore (see [index.md](index) and [raw.json](raw.json)). + +The relevant information that we have in MMIF is in the following types: + +1. Instances of *TimeFrame* with frameType "bars-and-tone" or "slate". These directly refer back to time slices in the video. +2. Instances of *SemanticTag* with tagName "Date", "Title", "Host" or "Producer". These can be traced back to the part of the video where the information was obtained (that is, the location of the slate), but this is not needed here because it is not required by PBCore (or even allowed in the PBCore elements that we would be using). +3. Instances of *NamedEntity* with category "Person", "Location" or "Organization". These do need to be traced back because we want to index on the locations in the video where a subject occurs. + +Tracing back to the source location requires some processing because while the information is available in the MMIF file it is not explicitly stated in the *NamedEntity* annotation. + +A note on collaboration on this. The CLAMS team could do one of the following: + +1. Provide code and an API that makes it easy to get the information from a MMIF file that is needed. +2. Create code that extracts the needed in formation from a MMIF file and outputs it in some kind of generic format. +3. Create code that extracts the information and creates PBCore output. + +Most of the work is in item 1 and that would be the minimal thing to do for CLAMS, but this document assumes for now that the CLAMS team also creates PBCore output. + +### Mappings from MMIF to PBCore + +The PBCore to be created has a top-level *pbcoreDescriptionDocument* element: + +```xml + + +``` + +Within this top-level element we may add the following sub elements: *pbcoreAssetDate*, *pbcoreTitle*, *pbcoreContributor*, *pbcoreSubject*, *pbcoreAnnotation* and *pbcoreDescription*. The examples below for the MMIF example file [raw.json](raw.json) are based on the descriptions in [http://pbcore.org/elements](http://pbcore.org/elements) and feedback from Kevin. + +To map the MMIF time frames we need a need an element that allows us to express the type and the start and end times. The only one I can see that is not obviously intended for other uses is *pbcoreDescription* (in an earlier version of this document I used *pbcorePart* which is really not appropriate). + +```xml + + +``` + +Instead of *descriptionType* it may be more appropriate to use *segmentType*, but from the descriptions given in [http://pbcore.org/elements/pbcoredescription](http://pbcore.org/elements/pbcoredescription) it is not really clear to me which one is best. Another question I have is whether the attribute values for start and end can be milliseconds from the beginning of the video. + +It was suggested that as an alternative we could use *instantiationTimeStart*, which can be repetaed: + +```xml +0 +2600 +2700 +5300 +``` + +This looks rather forced to me and would also require using a *pbcoreInstantiationDocument* toplevel tag I think, so I will for now dismiss this summarily. + +The semantic tags in MMIF have direct and unproblematic mappings to PBCore elements: + +Date → pbcoreAssetDate +Title → pbcoreTitle +Host → pbcoreContributor +Producer → pbcoreContributor + +```xml +1982-05-12 +``` + +```xml +Loud Dogs +``` + +```xml + + Jim Lehrer + Host + +``` + +```xml + + Sara Just + Producer + +``` + +For the named entities we can use *pbcoreSubject*: + +```xml +Jim Lehrer +``` + +```xml +PBS +``` + +```xml +New York +``` + +I am not sure how to spin the attributes so this here is my best guesstimate. Note that "Jim Lehrer" shows up both as a contributor and as a subject, the former because he was mentioned in the slate and the latter because his name was used in the transcript. + +The subject type is the entity category for all of these and the *ref* property is used to refer to some external authoritative source. + +Start and end time are in milliseconds. For the first two they are generated by finding the tokens in the transcript text documents (by comparing start and end character offsets) and then tracking those to the time frames that they are aligned with. + +For the third, we know the named entity occurs in some text document (created by Tesseract) and we track that document to the bounding box generated by EAST that the document is aligned with. That bounding box has a *timePoint* attribute that is used for both start and end time. Note that if there had be a second text box for the "Dog in New York" text (that is, if the time the image was displayed on screen was a little bit longer) then that box would have its own time point and the end time for "New York" would have been 22000. + +It is possible that there may be many instantiations of *pbcoreSubject*, for example for a common entity like Boston. There is some unease on having multiple elements for a single named entity, but it is not clear what to do about it (use other element? only have one instance?). For now, we will dump all entities in PBCore subject elements and see how that pans out. In general, it seems fairly easy to export relevant information from MMIF into PBCore without loss of information, and what is exported and what the exact landing spots are going to be can be driven by PBCore-specific reasons. + +Finally, here is all the above in one XML file, adding some identifier that we get from the input: + +```xml + + + 1982-05-12 + + SOME_ID + + Loud Dogs + + Jim Lehrer + + PBS + + New York + + + + + + + Jim Lehrer + Host + + + + Sara Just + Producer + + + +``` + diff --git a/docs/1.0.1/samples/everything/raw.json b/docs/1.0.1/samples/everything/raw.json new file mode 100644 index 00000000..85566f07 --- /dev/null +++ b/docs/1.0.1/samples/everything/raw.json @@ -0,0 +1,1563 @@ +{ + + "metadata": { + "mmif": "http://mmif.clams.ai/1.0.1" + }, + + "documents": [ + { + "@type": "http://mmif.clams.ai/vocabulary/VideoDocument/v1", + "properties": { + "id": "m1", + "mime": "video/mpeg", + "location": "file:///var/archive/video-002.mp4" } + } + ], + + "views": [ + + { + "id": "v1", + "metadata": { + "app": "http://apps.clams.ai/bars-and-tones/1.0.5", + "timestamp": "2020-05-27T12:23:45", + "contains": { + "http://mmif.clams.ai/vocabulary/TimeFrame/v2": { + "document": "m1", + "timeUnit": "milliseconds" } + } + }, + "annotations": [ + { + "@type": "http://mmif.clams.ai/vocabulary/TimeFrame/v2", + "properties": { + "id": "s1", + "start": 0, + "end": 2600, + "frameType": "bars-and-tones" } + } + ] + }, + + { + "id": "v2", + "metadata": { + "app": "http://apps.clams.ai/slates/1.0.3", + "timestamp": "2020-05-27T12:23:45", + "contains": { + "http://mmif.clams.ai/vocabulary/TimeFrame/v2": { + "document": "m1", + "timeUnit": "milliseconds" } + } + }, + "annotations": [ + { + "@type": "http://mmif.clams.ai/vocabulary/TimeFrame/v2", + "properties": { + "id": "s1", + "start": 2700, + "end": 5300, + "frameType": "slate" } + } + ] + }, + + { + "id": "v3", + "metadata": { + "app": "http://mmif.clams.ai/apps/audio-segmenter/0.2.1", + "contains": { + "http://mmif.clams.ai/vocabulary/TimeFrame/v2": { + "timeUnit": "milliseconds", + "document": "m1" } + } + }, + "annotations": [ + { + "@type": "http://mmif.clams.ai/vocabulary/TimeFrame/v2", + "properties": { + "frameType": "non-speech", + "id": "tf1", + "start": 0, + "end": 5500 } + }, + { + "@type": "http://mmif.clams.ai/vocabulary/TimeFrame/v2", + "properties": { + "id": "tf2", + "frameType": "speech", + "start": 5500, + "end": 22000 } + } + ] + }, + + { + "id": "v4", + "metadata": { + "app": "http://mmif.clams.ai/apps/kaldi/0.2.1", + "contains": { + "http://mmif.clams.ai/vocabulary/TextDocument/v1": {}, + "http://vocab.lappsgrid.org/Token": { + "document": "td1" }, + "http://mmif.clams.ai/vocabulary/TimeFrame/v2": { + "timeUnit": "milliseconds", + "document": "m1" }, + "http://mmif.clams.ai/vocabulary/Alignment/v1": {} + } + }, + "annotations": [ + { + "@type": "http://mmif.clams.ai/vocabulary/TextDocument/v1", + "properties": { + "id": "td1", + "mime": "text/plain", + "location": "file:///var/archive/transcript-002.txt" } + }, + { + "@type": "http://mmif.clams.ai/vocabulary/Alignment/v1", + "properties": { + "id": "a1", + "source": "v3:tf1", + "target": "td1" } + }, + { + "@type": "http://vocab.lappsgrid.org/Token", + "properties": { + "id": "t1", + "start": 0, + "end": 5, + "text": "Hello" } + }, + { + "@type": "http://mmif.clams.ai/vocabulary/TimeFrame/v2", + "properties": { + "id": "tf1", + "start": 5500, + "end": 6085 } + }, + { + "@type": "http://mmif.clams.ai/vocabulary/Alignment/v1", + "properties": { + "id": "a2", + "source": "tf1", + "target": "t1" } + }, + { + "@type": "http://vocab.lappsgrid.org/Token", + "properties": { + "id": "t2", + "start": 5, + "end": 6, + "text": "," } + }, + { + "@type": "http://mmif.clams.ai/vocabulary/TimeFrame/v2", + "properties": { + "id": "tf2", + "start": 6085, + "end": 6202 } + }, + { + "@type": "http://mmif.clams.ai/vocabulary/Alignment/v1", + "properties": { + "id": "a3", + "source": "tf2", + "target": "t2" } + }, + { + "@type": "http://vocab.lappsgrid.org/Token", + "properties": { + "id": "t3", + "start": 7, + "end": 11, + "text": "this" } + }, + { + "@type": "http://mmif.clams.ai/vocabulary/TimeFrame/v2", + "properties": { + "id": "tf3", + "start": 6319, + "end": 6787 } + }, + { + "@type": "http://mmif.clams.ai/vocabulary/Alignment/v1", + "properties": { + "id": "a4", + "source": "tf3", + "target": "t3" } + }, + { + "@type": "http://vocab.lappsgrid.org/Token", + "properties": { + "id": "t4", + "start": 12, + "end": 14, + "text": "is" } + }, + { + "@type": "http://mmif.clams.ai/vocabulary/TimeFrame/v2", + "properties": { + "id": "tf4", + "start": 6904, + "end": 7138 } + }, + { + "@type": "http://mmif.clams.ai/vocabulary/Alignment/v1", + "properties": { + "id": "a5", + "source": "tf4", + "target": "t4" } + }, + { + "@type": "http://vocab.lappsgrid.org/Token", + "properties": { + "id": "t5", + "start": 15, + "end": 18, + "text": "Jim" } + }, + { + "@type": "http://mmif.clams.ai/vocabulary/TimeFrame/v2", + "properties": { + "id": "tf5", + "start": 7255, + "end": 7606 } + }, + { + "@type": "http://mmif.clams.ai/vocabulary/Alignment/v1", + "properties": { + "id": "a6", + "source": "tf5", + "target": "t5" } + }, + { + "@type": "http://vocab.lappsgrid.org/Token", + "properties": { + "id": "t6", + "start": 19, + "end": 25, + "text": "Lehrer" } + }, + { + "@type": "http://mmif.clams.ai/vocabulary/TimeFrame/v2", + "properties": { + "id": "tf6", + "start": 7723, + "end": 8425 } + }, + { + "@type": "http://mmif.clams.ai/vocabulary/Alignment/v1", + "properties": { + "id": "a7", + "source": "tf6", + "target": "t6" } + }, + { + "@type": "http://vocab.lappsgrid.org/Token", + "properties": { + "id": "t7", + "start": 26, + "end": 30, + "text": "with" } + }, + { + "@type": "http://mmif.clams.ai/vocabulary/TimeFrame/v2", + "properties": { + "id": "tf7", + "start": 8542, + "end": 9010 } + }, + { + "@type": "http://mmif.clams.ai/vocabulary/Alignment/v1", + "properties": { + "id": "a8", + "source": "tf7", + "target": "t7" } + }, + { + "@type": "http://vocab.lappsgrid.org/Token", + "properties": { + "id": "t8", + "start": 31, + "end": 34, + "text": "the" } + }, + { + "@type": "http://mmif.clams.ai/vocabulary/TimeFrame/v2", + "properties": { + "id": "tf8", + "start": 9127, + "end": 9478 } + }, + { + "@type": "http://mmif.clams.ai/vocabulary/Alignment/v1", + "properties": { + "id": "a9", + "source": "tf8", + "target": "t8" } + }, + { + "@type": "http://vocab.lappsgrid.org/Token", + "properties": { + "id": "t9", + "start": 35, + "end": 43, + "text": "NewsHour" } + }, + { + "@type": "http://mmif.clams.ai/vocabulary/TimeFrame/v2", + "properties": { + "id": "tf9", + "start": 9595, + "end": 10531 } + }, + { + "@type": "http://mmif.clams.ai/vocabulary/Alignment/v1", + "properties": { + "id": "a10", + "source": "tf9", + "target": "t9" } + }, + { + "@type": "http://vocab.lappsgrid.org/Token", + "properties": { + "id": "t10", + "start": 44, + "end": 46, + "text": "on" } + }, + { + "@type": "http://mmif.clams.ai/vocabulary/TimeFrame/v2", + "properties": { + "id": "tf10", + "start": 10648, + "end": 10882 } + }, + { + "@type": "http://mmif.clams.ai/vocabulary/Alignment/v1", + "properties": { + "id": "a11", + "source": "tf10", + "target": "t10" } + }, + { + "@type": "http://vocab.lappsgrid.org/Token", + "properties": { + "id": "t11", + "start": 47, + "end": 50, + "text": "PBS" } + }, + { + "@type": "http://mmif.clams.ai/vocabulary/TimeFrame/v2", + "properties": { + "id": "tf11", + "start": 10999, + "end": 11350 } + }, + { + "@type": "http://mmif.clams.ai/vocabulary/Alignment/v1", + "properties": { + "id": "a12", + "source": "tf11", + "target": "t11" } + }, + { + "@type": "http://vocab.lappsgrid.org/Token", + "properties": { + "id": "t12", + "start": 50, + "end": 51, + "text": "." } + }, + { + "@type": "http://mmif.clams.ai/vocabulary/TimeFrame/v2", + "properties": { + "id": "tf12", + "start": 11350, + "end": 11467 } + }, + { + "@type": "http://mmif.clams.ai/vocabulary/Alignment/v1", + "properties": { + "id": "a13", + "source": "tf12", + "target": "t12" } + }, + { + "@type": "http://vocab.lappsgrid.org/Token", + "properties": { + "id": "t13", + "start": 52, + "end": 54, + "text": "In" } + }, + { + "@type": "http://mmif.clams.ai/vocabulary/TimeFrame/v2", + "properties": { + "id": "tf13", + "start": 11584, + "end": 11818 } + }, + { + "@type": "http://mmif.clams.ai/vocabulary/Alignment/v1", + "properties": { + "id": "a14", + "source": "tf13", + "target": "t13" } + }, + { + "@type": "http://vocab.lappsgrid.org/Token", + "properties": { + "id": "t14", + "start": 55, + "end": 58, + "text": "the" } + }, + { + "@type": "http://mmif.clams.ai/vocabulary/TimeFrame/v2", + "properties": { + "id": "tf14", + "start": 11935, + "end": 12286 } + }, + { + "@type": "http://mmif.clams.ai/vocabulary/Alignment/v1", + "properties": { + "id": "a15", + "source": "tf14", + "target": "t14" } + }, + { + "@type": "http://vocab.lappsgrid.org/Token", + "properties": { + "id": "t15", + "start": 59, + "end": 67, + "text": "nineteen" } + }, + { + "@type": "http://mmif.clams.ai/vocabulary/TimeFrame/v2", + "properties": { + "id": "tf15", + "start": 12403, + "end": 13339 } + }, + { + "@type": "http://mmif.clams.ai/vocabulary/Alignment/v1", + "properties": { + "id": "a16", + "source": "tf15", + "target": "t15" } + }, + { + "@type": "http://vocab.lappsgrid.org/Token", + "properties": { + "id": "t16", + "start": 68, + "end": 76, + "text": "eighties" } + }, + { + "@type": "http://mmif.clams.ai/vocabulary/TimeFrame/v2", + "properties": { + "id": "tf16", + "start": 13456, + "end": 14392 } + }, + { + "@type": "http://mmif.clams.ai/vocabulary/Alignment/v1", + "properties": { + "id": "a17", + "source": "tf16", + "target": "t16" } + }, + { + "@type": "http://vocab.lappsgrid.org/Token", + "properties": { + "id": "t17", + "start": 76, + "end": 77, + "text": "," } + }, + { + "@type": "http://mmif.clams.ai/vocabulary/TimeFrame/v2", + "properties": { + "id": "tf17", + "start": 14392, + "end": 14509 } + }, + { + "@type": "http://mmif.clams.ai/vocabulary/Alignment/v1", + "properties": { + "id": "a18", + "source": "tf17", + "target": "t17" } + }, + { + "@type": "http://vocab.lappsgrid.org/Token", + "properties": { + "id": "t18", + "start": 78, + "end": 85, + "text": "barking" } + }, + { + "@type": "http://mmif.clams.ai/vocabulary/TimeFrame/v2", + "properties": { + "id": "tf18", + "start": 14626, + "end": 15445 } + }, + { + "@type": "http://mmif.clams.ai/vocabulary/Alignment/v1", + "properties": { + "id": "a19", + "source": "tf18", + "target": "t18" } + }, + { + "@type": "http://vocab.lappsgrid.org/Token", + "properties": { + "id": "t19", + "start": 86, + "end": 90, + "text": "dogs" } + }, + { + "@type": "http://mmif.clams.ai/vocabulary/TimeFrame/v2", + "properties": { + "id": "tf19", + "start": 15562, + "end": 16030 } + }, + { + "@type": "http://mmif.clams.ai/vocabulary/Alignment/v1", + "properties": { + "id": "a20", + "source": "tf19", + "target": "t19" } + }, + { + "@type": "http://vocab.lappsgrid.org/Token", + "properties": { + "id": "t20", + "start": 91, + "end": 95, + "text": "have" } + }, + { + "@type": "http://mmif.clams.ai/vocabulary/TimeFrame/v2", + "properties": { + "id": "tf20", + "start": 16147, + "end": 16615 } + }, + { + "@type": "http://mmif.clams.ai/vocabulary/Alignment/v1", + "properties": { + "id": "a21", + "source": "tf20", + "target": "t20" } + }, + { + "@type": "http://vocab.lappsgrid.org/Token", + "properties": { + "id": "t21", + "start": 96, + "end": 108, + "text": "increasingly" } + }, + { + "@type": "http://mmif.clams.ai/vocabulary/TimeFrame/v2", + "properties": { + "id": "tf21", + "start": 16732, + "end": 18136 } + }, + { + "@type": "http://mmif.clams.ai/vocabulary/Alignment/v1", + "properties": { + "id": "a22", + "source": "tf21", + "target": "t21" } + }, + { + "@type": "http://vocab.lappsgrid.org/Token", + "properties": { + "id": "t22", + "start": 109, + "end": 115, + "text": "become" } + }, + { + "@type": "http://mmif.clams.ai/vocabulary/TimeFrame/v2", + "properties": { + "id": "tf22", + "start": 18253, + "end": 18955 } + }, + { + "@type": "http://mmif.clams.ai/vocabulary/Alignment/v1", + "properties": { + "id": "a23", + "source": "tf22", + "target": "t22" } + }, + { + "@type": "http://vocab.lappsgrid.org/Token", + "properties": { + "id": "t23", + "start": 116, + "end": 117, + "text": "a" } + }, + { + "@type": "http://mmif.clams.ai/vocabulary/TimeFrame/v2", + "properties": { + "id": "tf23", + "start": 19072, + "end": 19189 } + }, + { + "@type": "http://mmif.clams.ai/vocabulary/Alignment/v1", + "properties": { + "id": "a24", + "source": "tf23", + "target": "t23" } + }, + { + "@type": "http://vocab.lappsgrid.org/Token", + "properties": { + "id": "t24", + "start": 118, + "end": 125, + "text": "problem" } + }, + { + "@type": "http://mmif.clams.ai/vocabulary/TimeFrame/v2", + "properties": { + "id": "tf24", + "start": 19306, + "end": 20125 } + }, + { + "@type": "http://mmif.clams.ai/vocabulary/Alignment/v1", + "properties": { + "id": "a25", + "source": "tf24", + "target": "t24" } + }, + { + "@type": "http://vocab.lappsgrid.org/Token", + "properties": { + "id": "t25", + "start": 126, + "end": 128, + "text": "in" } + }, + { + "@type": "http://mmif.clams.ai/vocabulary/TimeFrame/v2", + "properties": { + "id": "tf25", + "start": 20242, + "end": 20476 } + }, + { + "@type": "http://mmif.clams.ai/vocabulary/Alignment/v1", + "properties": { + "id": "a26", + "source": "tf25", + "target": "t25" } + }, + { + "@type": "http://vocab.lappsgrid.org/Token", + "properties": { + "id": "t26", + "start": 129, + "end": 134, + "text": "urban" } + }, + { + "@type": "http://mmif.clams.ai/vocabulary/TimeFrame/v2", + "properties": { + "id": "tf26", + "start": 20593, + "end": 21178 } + }, + { + "@type": "http://mmif.clams.ai/vocabulary/Alignment/v1", + "properties": { + "id": "a27", + "source": "tf26", + "target": "t26" } + }, + { + "@type": "http://vocab.lappsgrid.org/Token", + "properties": { + "id": "t27", + "start": 135, + "end": 140, + "text": "areas" } + }, + { + "@type": "http://mmif.clams.ai/vocabulary/TimeFrame/v2", + "properties": { + "id": "tf27", + "start": 21295, + "end": 21880 } + }, + { + "@type": "http://mmif.clams.ai/vocabulary/Alignment/v1", + "properties": { + "id": "a28", + "source": "tf27", + "target": "t27" } + }, + { + "@type": "http://vocab.lappsgrid.org/Token", + "properties": { + "id": "t28", + "start": 140, + "end": 141, + "text": "." } + }, + { + "@type": "http://mmif.clams.ai/vocabulary/TimeFrame/v2", + "properties": { + "id": "tf28", + "start": 21880, + "end": 21997 } + }, + { + "@type": "http://mmif.clams.ai/vocabulary/Alignment/v1", + "properties": { + "id": "a29", + "source": "tf28", + "target": "t28" } + } + ] + }, + + { + "id": "v5", + "metadata": { + "app": "http://mmif.clams.ai/apps/east/0.2.1", + "contains": { + "http://mmif.clams.ai/vocabulary/BoundingBox/v1": { + "document": "m1" } + } + }, + "annotations": [ + { + "@type": "http://mmif.clams.ai/vocabulary/BoundingBox/v1", + "properties": { + "id": "bb1", + "timePoint": 3000, + "coordinates": [[180, 110], [460, 110], [180, 170], [460, 170]], + "boxType": "text" } + }, + { + "@type": "http://mmif.clams.ai/vocabulary/BoundingBox/v1", + "properties": { + "id": "bb2", + "timePoint": 3000, + "coordinates": [[660, 110], [1250, 110], [660, 170], [1250, 170]], + "boxType": "text" } + }, + { + "@type": "http://mmif.clams.ai/vocabulary/BoundingBox/v1", + "properties": { + "id": "bb3", + "timePoint": 3000, + "coordinates": [[180, 320], [460, 320], [180, 380], [460, 380]], + "boxType": "text" } + }, + { + "@type": "http://mmif.clams.ai/vocabulary/BoundingBox/v1", + "properties": { + "id": "bb4", + "timePoint": 3000, + "coordinates": [[660, 320], [1210, 320], [660, 380], [1210, 380]], + "boxType": "text" } + }, + { + "@type": "http://mmif.clams.ai/vocabulary/BoundingBox/v1", + "properties": { + "id": "bb5", + "timePoint": 3000, + "coordinates": [[180, 520], [460, 520], [180, 580], [460, 580]], + "boxType": "text" } + }, + { + "@type": "http://mmif.clams.ai/vocabulary/BoundingBox/v1", + "properties": { + "id": "bb6", + "timePoint": 3000, + "coordinates": [[660, 520], [1200, 520], [660, 580], [1200, 580]], + "boxType": "text" } + }, + { + "@type": "http://mmif.clams.ai/vocabulary/BoundingBox/v1", + "properties": { + "id": "bb7", + "timePoint": 3000, + "coordinates": [[180, 750], [470, 750], [180, 810], [470, 810]], + "boxType": "text" } + }, + { + "@type": "http://mmif.clams.ai/vocabulary/BoundingBox/v1", + "properties": { + "id": "bb8", + "timePoint": 3000, + "coordinates": [[660, 750], [1180, 750], [660, 810], [1180, 810]], + "boxType": "text" } + }, + { + "@type": "http://mmif.clams.ai/vocabulary/BoundingBox/v1", + "properties": { + "id": "bb9", + "timePoint": 4000, + "coordinates": [[180, 110], [460, 110], [180, 170], [460, 170]], + "boxType": "text" } + }, + { + "@type": "http://mmif.clams.ai/vocabulary/BoundingBox/v1", + "properties": { + "id": "bb10", + "timePoint": 4000, + "coordinates": [[660, 110], [1250, 110], [660, 170], [1250, 170]], + "boxType": "text" } + }, + { + "@type": "http://mmif.clams.ai/vocabulary/BoundingBox/v1", + "properties": { + "id": "bb11", + "timePoint": 4000, + "coordinates": [[180, 320], [460, 320], [180, 380], [460, 380]], + "boxType": "text" } + }, + { + "@type": "http://mmif.clams.ai/vocabulary/BoundingBox/v1", + "properties": { + "id": "bb12", + "timePoint": 4000, + "coordinates": [[660, 320], [1210, 320], [660, 380], [1210, 380]], + "boxType": "text" } + }, + { + "@type": "http://mmif.clams.ai/vocabulary/BoundingBox/v1", + "properties": { + "id": "bb13", + "timePoint": 4000, + "coordinates": [[180, 520], [460, 520], [180, 580], [460, 580]], + "boxType": "text" } + }, + { + "@type": "http://mmif.clams.ai/vocabulary/BoundingBox/v1", + "properties": { + "id": "bb14", + "timePoint": 4000, + "coordinates": [[660, 520], [1200, 520], [660, 580], [1200, 580]], + "boxType": "text" } + }, + { + "@type": "http://mmif.clams.ai/vocabulary/BoundingBox/v1", + "properties": { + "id": "bb15", + "timePoint": 4000, + "coordinates": [[180, 750], [470, 750], [180, 810], [470, 810]], + "boxType": "text" } + }, + { + "@type": "http://mmif.clams.ai/vocabulary/BoundingBox/v1", + "properties": { + "id": "bb16", + "timePoint": 4000, + "coordinates": [[660, 750], [1180, 750], [660, 810], [1180, 810]], + "boxType": "text" } + }, + { + "@type": "http://mmif.clams.ai/vocabulary/BoundingBox/v1", + "properties": { + "id": "bb17", + "timePoint": 5000, + "coordinates": [[180, 110], [460, 110], [180, 170], [460, 170]], + "boxType": "text" } + }, + { + "@type": "http://mmif.clams.ai/vocabulary/BoundingBox/v1", + "properties": { + "id": "bb18", + "timePoint": 5000, + "coordinates": [[660, 110], [1250, 110], [660, 170], [1250, 170]], + "boxType": "text" } + }, + { + "@type": "http://mmif.clams.ai/vocabulary/BoundingBox/v1", + "properties": { + "id": "bb19", + "timePoint": 5000, + "coordinates": [[180, 320], [460, 320], [180, 380], [460, 380]], + "boxType": "text" } + }, + { + "@type": "http://mmif.clams.ai/vocabulary/BoundingBox/v1", + "properties": { + "id": "bb20", + "timePoint": 5000, + "coordinates": [[660, 320], [1210, 320], [660, 380], [1210, 380]], + "boxType": "text" } + }, + { + "@type": "http://mmif.clams.ai/vocabulary/BoundingBox/v1", + "properties": { + "id": "bb21", + "timePoint": 5000, + "coordinates": [[180, 520], [460, 520], [180, 580], [460, 580]], + "boxType": "text" } + }, + { + "@type": "http://mmif.clams.ai/vocabulary/BoundingBox/v1", + "properties": { + "id": "bb22", + "timePoint": 5000, + "coordinates": [[660, 520], [1200, 520], [660, 580], [1200, 580]], + "boxType": "text" } + }, + { + "@type": "http://mmif.clams.ai/vocabulary/BoundingBox/v1", + "properties": { + "id": "bb23", + "timePoint": 5000, + "coordinates": [[180, 750], [470, 750], [180, 810], [470, 810]], + "boxType": "text" } + }, + { + "@type": "http://mmif.clams.ai/vocabulary/BoundingBox/v1", + "properties": { + "id": "bb24", + "timePoint": 5000, + "coordinates": [[660, 750], [1180, 750], [660, 810], [1180, 810]], + "boxType": "text" } + }, + { + "@type": "http://mmif.clams.ai/vocabulary/BoundingBox/v1", + "properties": { + "id": "bb25", + "timePoint": 21000, + "coordinates": [[150, 810], [1120, 810], [150, 870], [1120, 870]], + "boxType": "text" } + } + ] + }, + + { + "id": "v6", + "metadata": { + "app": "http://mmif.clams.ai/apps/tesseract/0.2.1", + "contains": { + "http://mmif.clams.ai/vocabulary/TextDocument/v1": {}, + "http://mmif.clams.ai/vocabulary/Alignment/v1": { + "sourceType": "http://mmif.clams.ai/vocabulary/TextDocument/v1", + "targetType": "http://mmif.clams.ai/vocabulary/BoundingBox/v1" + } + } + }, + "annotations": [ + { + "@type": "http://mmif.clams.ai/vocabulary/TextDocument/v1", + "properties": { + "id": "td1", + "text": { "@value": "DATE" } } + }, + { + "@type": "http://mmif.clams.ai/vocabulary/Alignment/v1", + "properties": { + "id": "a1", + "source": "v5:bb1", + "target": "td1" } + }, + { + "@type": "http://mmif.clams.ai/vocabulary/TextDocument/v1", + "properties": { + "id": "td2", + "text": { "@value": "1982-05-12" } } + }, + { + "@type": "http://mmif.clams.ai/vocabulary/Alignment/v1", + "properties": { + "id": "a2", + "source": "v5:bb2", + "target": "td2" } + }, + { + "@type": "http://mmif.clams.ai/vocabulary/TextDocument/v1", + "properties": { + "id": "td3", + "text": { "@value": "TITLE" } } + }, + { + "@type": "http://mmif.clams.ai/vocabulary/Alignment/v1", + "properties": { + "id": "a3", + "source": "v5:bb3", + "target": "td3" } + }, + { + "@type": "http://mmif.clams.ai/vocabulary/TextDocument/v1", + "properties": { + "id": "td4", + "text": { "@value": "Loud Dogs" } } + }, + { + "@type": "http://mmif.clams.ai/vocabulary/Alignment/v1", + "properties": { + "id": "a4", + "source": "v5:bb4", + "target": "td4" } + }, + { + "@type": "http://mmif.clams.ai/vocabulary/TextDocument/v1", + "properties": { + "id": "td5", + "text": { "@value": "HOST" } } + }, + { + "@type": "http://mmif.clams.ai/vocabulary/Alignment/v1", + "properties": { + "id": "a5", + "source": "v5:bb5", + "target": "td5" } + }, + { + "@type": "http://mmif.clams.ai/vocabulary/TextDocument/v1", + "properties": { + "id": "td6", + "text": { "@value": "Jim Lehrer" } } + }, + { + "@type": "http://mmif.clams.ai/vocabulary/Alignment/v1", + "properties": { + "id": "a6", + "source": "v5:bb6", + "target": "td6" } + }, + { + "@type": "http://mmif.clams.ai/vocabulary/TextDocument/v1", + "properties": { + "id": "td7", + "text": { "@value": "PROD" } } + }, + { + "@type": "http://mmif.clams.ai/vocabulary/Alignment/v1", + "properties": { + "id": "a7", + "source": "v5:bb7", + "target": "td7" } + }, + { + "@type": "http://mmif.clams.ai/vocabulary/TextDocument/v1", + "properties": { + "id": "td8", + "text": { "@value": "Sara Just" } } + }, + { + "@type": "http://mmif.clams.ai/vocabulary/Alignment/v1", + "properties": { + "id": "a8", + "source": "v5:bb8", + "target": "td8" } + }, + { + "@type": "http://mmif.clams.ai/vocabulary/TextDocument/v1", + "properties": { + "id": "td9", + "text": { "@value": "DATE" } } + }, + { + "@type": "http://mmif.clams.ai/vocabulary/Alignment/v1", + "properties": { + "id": "a9", + "source": "v5:bb9", + "target": "td9" } + }, + { + "@type": "http://mmif.clams.ai/vocabulary/TextDocument/v1", + "properties": { + "id": "td10", + "text": { "@value": "1982-05-12" } } + }, + { + "@type": "http://mmif.clams.ai/vocabulary/Alignment/v1", + "properties": { + "id": "a10", + "source": "v5:bb10", + "target": "td10" } + }, + { + "@type": "http://mmif.clams.ai/vocabulary/TextDocument/v1", + "properties": { + "id": "td11", + "text": { "@value": "TITLE" } } + }, + { + "@type": "http://mmif.clams.ai/vocabulary/Alignment/v1", + "properties": { + "id": "a11", + "source": "v5:bb11", + "target": "td11" } + }, + { + "@type": "http://mmif.clams.ai/vocabulary/TextDocument/v1", + "properties": { + "id": "td12", + "text": { "@value": "Loud Dogs" } } + }, + { + "@type": "http://mmif.clams.ai/vocabulary/Alignment/v1", + "properties": { + "id": "a12", + "source": "v5:bb12", + "target": "td12" } + }, + { + "@type": "http://mmif.clams.ai/vocabulary/TextDocument/v1", + "properties": { + "id": "td13", + "text": { "@value": "HOST" } } + }, + { + "@type": "http://mmif.clams.ai/vocabulary/Alignment/v1", + "properties": { + "id": "a13", + "source": "v5:bb13", + "target": "td13" } + }, + { + "@type": "http://mmif.clams.ai/vocabulary/TextDocument/v1", + "properties": { + "id": "td14", + "text": { "@value": "Jim Lehrer" } } + }, + { + "@type": "http://mmif.clams.ai/vocabulary/Alignment/v1", + "properties": { + "id": "a14", + "source": "v5:bb14", + "target": "td14" } + }, + { + "@type": "http://mmif.clams.ai/vocabulary/TextDocument/v1", + "properties": { + "id": "td15", + "text": { "@value": "PROD" } } + }, + { + "@type": "http://mmif.clams.ai/vocabulary/Alignment/v1", + "properties": { + "id": "a15", + "source": "v5:bb15", + "target": "td15" } + }, + { + "@type": "http://mmif.clams.ai/vocabulary/TextDocument/v1", + "properties": { + "id": "td16", + "text": { "@value": "Sara Just" } } + }, + { + "@type": "http://mmif.clams.ai/vocabulary/Alignment/v1", + "properties": { + "id": "a16", + "source": "v5:bb16", + "target": "td16" } + }, + { + "@type": "http://mmif.clams.ai/vocabulary/TextDocument/v1", + "properties": { + "id": "td17", + "text": { "@value": "DATE" } } + }, + { + "@type": "http://mmif.clams.ai/vocabulary/Alignment/v1", + "properties": { + "id": "a17", + "source": "v5:bb17", + "target": "td17" } + }, + { + "@type": "http://mmif.clams.ai/vocabulary/TextDocument/v1", + "properties": { + "id": "td18", + "text": { "@value": "1982-05-12" } } + }, + { + "@type": "http://mmif.clams.ai/vocabulary/Alignment/v1", + "properties": { + "id": "a18", + "source": "v5:bb18", + "target": "td18" } + }, + { + "@type": "http://mmif.clams.ai/vocabulary/TextDocument/v1", + "properties": { + "id": "td19", + "text": { "@value": "TITLE" } } + }, + { + "@type": "http://mmif.clams.ai/vocabulary/Alignment/v1", + "properties": { + "id": "a19", + "source": "v5:bb19", + "target": "td19" } + }, + { + "@type": "http://mmif.clams.ai/vocabulary/TextDocument/v1", + "properties": { + "id": "td20", + "text": { "@value": "Loud Dogs" } } + }, + { + "@type": "http://mmif.clams.ai/vocabulary/Alignment/v1", + "properties": { + "id": "a20", + "source": "v5:bb20", + "target": "td20" } + }, + { + "@type": "http://mmif.clams.ai/vocabulary/TextDocument/v1", + "properties": { + "id": "td21", + "text": { "@value": "HOST" } } + }, + { + "@type": "http://mmif.clams.ai/vocabulary/Alignment/v1", + "properties": { + "id": "a21", + "source": "v5:bb21", + "target": "td21" } + }, + { + "@type": "http://mmif.clams.ai/vocabulary/TextDocument/v1", + "properties": { + "id": "td22", + "text": { "@value": "Jim Lehrer" } } + }, + { + "@type": "http://mmif.clams.ai/vocabulary/Alignment/v1", + "properties": { + "id": "a22", + "source": "v5:bb22", + "target": "td22" } + }, + { + "@type": "http://mmif.clams.ai/vocabulary/TextDocument/v1", + "properties": { + "id": "td23", + "text": { "@value": "PROD" } } + }, + { + "@type": "http://mmif.clams.ai/vocabulary/Alignment/v1", + "properties": { + "id": "a23", + "source": "v5:bb23", + "target": "td23" } + }, + { + "@type": "http://mmif.clams.ai/vocabulary/TextDocument/v1", + "properties": { + "id": "td24", + "text": { "@value": "Sara Just" } } + }, + { + "@type": "http://mmif.clams.ai/vocabulary/Alignment/v1", + "properties": { + "id": "a24", + "source": "v5:bb24", + "target": "td24" } + }, + { + "@type": "http://mmif.clams.ai/vocabulary/TextDocument/v1", + "properties": { + "id": "td25", + "text": { "@value": "Dog in New York" } } + }, + { + "@type": "http://mmif.clams.ai/vocabulary/Alignment/v1", + "properties": { + "id": "a25", + "source": "v5:bb25", + "target": "td25" } + } + ] + }, + + { + "id": "v7", + "metadata": { + "app": "http://apps.clams.ai/spacy-ner/0.2.1", + "contains": { + "http://vocab.lappsgrid.org/NamedEntity": {} + } + }, + "annotations": [ + { + "@type": "http://vocab.lappsgrid.org/NamedEntity", + "properties": { + "id": "ne1", + "document": "v6:td2", + "start": 0, + "end": 10, + "category": "Date", + "text": "1982-05-12" } + }, + { + "@type": "http://vocab.lappsgrid.org/NamedEntity", + "properties": { + "id": "ne2", + "document": "v6:td6", + "start": 0, + "end": 10, + "category": "Person", + "text": "Jim Lehrer" } + }, + { + "@type": "http://vocab.lappsgrid.org/NamedEntity", + "properties": { + "id": "ne3", + "document": "v6:td8", + "start": 0, + "end": 9, + "category": "Person", + "text": "Sara Just" } + }, + { + "@type": "http://vocab.lappsgrid.org/NamedEntity", + "properties": { + "id": "ne4", + "document": "v6:td10", + "start": 0, + "end": 10, + "category": "Date", + "text": "1982-05-12" } + }, + { + "@type": "http://vocab.lappsgrid.org/NamedEntity", + "properties": { + "id": "ne5", + "document": "v6:td14", + "start": 0, + "end": 10, + "category": "Person", + "text": "Jim Lehrer" } + }, + { + "@type": "http://vocab.lappsgrid.org/NamedEntity", + "properties": { + "id": "ne6", + "document": "v6:td16", + "start": 0, + "end": 9, + "category": "Person", + "text": "Sara Just" } + }, + { + "@type": "http://vocab.lappsgrid.org/NamedEntity", + "properties": { + "id": "ne7", + "document": "v6:td18", + "start": 0, + "end": 10, + "category": "Date", + "text": "1982-05-12" } + }, + { + "@type": "http://vocab.lappsgrid.org/NamedEntity", + "properties": { + "id": "ne8", + "document": "v6:td22", + "start": 0, + "end": 10, + "category": "Person", + "text": "Jim Lehrer" } + }, + { + "@type": "http://vocab.lappsgrid.org/NamedEntity", + "properties": { + "id": "ne9", + "document": "v6:td24", + "start": 0, + "end": 9, + "category": "Person", + "text": "Sara Just" } + }, + { + "@type": "http://vocab.lappsgrid.org/NamedEntity", + "properties": { + "id": "ne10", + "document": "v6:td25", + "start": 7, + "end": 15, + "category": "Location", + "text": "New York" } + }, + { + "@type": "http://vocab.lappsgrid.org/NamedEntity", + "properties": { + "id": "ne11", + "document": "v4:td1", + "start": 15, + "end": 25, + "category": "Person", + "text": "Jim Lehrer" } + }, + { + "@type": "http://vocab.lappsgrid.org/NamedEntity", + "properties": { + "id": "ne12", + "document": "v4:td1", + "start": 47, + "end": 50, + "category": "Organization", + "text": "PBS" } + } + ] + }, + + { + "id": "v8", + "metadata": { + "app": "http://apps.clams.ai/slate-parser/1.0.2", + "timestamp": "2020-05-27T12:23:45", + "contains": { + "http://vocab.lappsgrid.org/SemanticTag": {} + } + }, + "annotations": [ + { + "@type": "http://vocab.lappsgrid.org/SemanticTag", + "properties": { + "id": "st1", + "document": "v6:td2", + "start": 0, + "end": 10, + "tagName": "Date", + "text": "1982-05-12" } + }, + { + "@type": "http://vocab.lappsgrid.org/SemanticTag", + "properties": { + "id": "st2", + "document": "v6:td4", + "start": 0, + "end": 9, + "tagName": "Title", + "text": "Loud Dogs" } + }, + { + "@type": "http://vocab.lappsgrid.org/SemanticTag", + "properties": { + "id": "st3", + "document": "v6:td6", + "start": 0, + "end": 10, + "tagName": "Host", + "text": "Jim Lehrer" } + }, + { + "@type": "http://vocab.lappsgrid.org/SemanticTag", + "properties": { + "id": "st4", + "document": "v6:td8", + "start": 0, + "end": 9, + "tagName": "Producer", + "text": "Sara Just" } + }, + { + "@type": "http://vocab.lappsgrid.org/SemanticTag", + "properties": { + "id": "st5", + "document": "v6:td10", + "start": 0, + "end": 10, + "tagName": "Date", + "text": "1982-05-12" } + }, + { + "@type": "http://vocab.lappsgrid.org/SemanticTag", + "properties": { + "id": "st6", + "document": "v6:td12", + "start": 0, + "end": 9, + "tagName": "Title", + "text": "Loud Dogs" } + }, + { + "@type": "http://vocab.lappsgrid.org/SemanticTag", + "properties": { + "id": "st7", + "document": "v6:td14", + "start": 0, + "end": 10, + "tagName": "Host", + "text": "Jim Lehrer" } + }, + { + "@type": "http://vocab.lappsgrid.org/SemanticTag", + "properties": { + "id": "st8", + "document": "v6:td16", + "start": 0, + "end": 9, + "tagName": "Producer", + "text": "Sara Just" } + }, + { + "@type": "http://vocab.lappsgrid.org/SemanticTag", + "properties": { + "id": "st9", + "document": "v6:td18", + "start": 0, + "end": 10, + "tagName": "Date", + "text": "1982-05-12" } + }, + { + "@type": "http://vocab.lappsgrid.org/SemanticTag", + "properties": { + "id": "st10", + "document": "v6:td20", + "start": 0, + "end": 9, + "tagName": "Title", + "text": "Loud Dogs" } + }, + { + "@type": "http://vocab.lappsgrid.org/SemanticTag", + "properties": { + "id": "st11", + "document": "v6:td22", + "start": 0, + "end": 10, + "tagName": "Host", + "text": "Jim Lehrer" } + }, + { + "@type": "http://vocab.lappsgrid.org/SemanticTag", + "properties": { + "id": "st12", + "document": "v6:td24", + "start": 0, + "end": 9, + "tagName": "Producer", + "text": "Sara Just" } + } + ] + } + + ] +} diff --git a/docs/1.0.1/samples/everything/scripts/east.py b/docs/1.0.1/samples/everything/scripts/east.py new file mode 100644 index 00000000..67af89ae --- /dev/null +++ b/docs/1.0.1/samples/everything/scripts/east.py @@ -0,0 +1,53 @@ +"""east.py + +Utility script to create the bounding boxes of the EAST view in the example. + +""" + +from utils import print_annotation + + +slate_box_coordinates = [ + + [[180, 110], [460, 110], [180, 170], [460, 170]], # DATE + [[660, 110], [1250, 110], [660, 170], [1250, 170]], + + [[180, 320], [460, 320], [180, 380], [460, 380]], # TITLE + [[660, 320], [1210, 320], [660, 380], [1210, 380]], + + [[180, 520], [460, 520], [180, 580], [460, 580]], # HOST + [[660, 520], [1200, 520], [660, 580], [1200, 580]], + + [[180, 750], [470, 750], [180, 810], [470, 810]], # PROP + [[660, 750], [1180, 750], [660, 810], [1180, 810]] + +] + +fido_box_coordinates = [[150, 810], [1120, 810], [150, 870], [1120, 870]] + + + + + +if __name__ == '__main__': + + count = 0 + for time_offset in 3000, 4000, 5000: + for coordinates in slate_box_coordinates: + count += 1 + box_id = 'bb%s' % count + print_annotation( + "http://mmif.clams.ai/0.2.0/vocabulary/BoundingBox", + [('id', box_id), + ('timePoint', time_offset), + ('coordinates', coordinates), + ('boxType', 'text')]) + + count += 1 + box_id = 'bb%s' % count + print_annotation( + "http://mmif.clams.ai/0.2.0/vocabulary/BoundingBox", + [('id', box_id), + ('timePoint', 21000), + ('coordinates', fido_box_coordinates), + ('boxType', 'text')]) diff --git a/docs/1.0.1/samples/everything/scripts/kaldi.py b/docs/1.0.1/samples/everything/scripts/kaldi.py new file mode 100644 index 00000000..8ed4a062 --- /dev/null +++ b/docs/1.0.1/samples/everything/scripts/kaldi.py @@ -0,0 +1,53 @@ +"""kaldi.py + +Utility script to create the tokens, time frames and alignments of the Kaldi +view in the example. + +""" + +from utils import print_annotation + + +TOKENS = "Hello, this is Jim Lehrer with the NewsHour on PBS. In the nineteen eighties, barking dogs have increasingly become a problem in urban areas.".split() + + +# Calculating time offsets from text offsets +FIRST_TEXT_OFFSET = 0 +LAST_TEXT_OFFSET = 141 +FIRST_TIME_OFFSET = 5500 +LAST_TIME_OFFSET = 22000 +STEP = int((LAST_TIME_OFFSET - FIRST_TIME_OFFSET) / LAST_TEXT_OFFSET) + + +def gather_annotations(): + offset = 0 + token_annotations = [] + for token in TOKENS: + if token[-1] in ',.': + token_annotations.append((offset, offset + len(token) - 1, token[:-1])) + token_annotations.append((offset + len(token) - 1, offset + len(token), token[-1])) + else: + token_annotations.append((offset, offset + len(token), token)) + offset += len(token) + 1 + return token_annotations + + +if __name__ == '__main__': + + count = 0 + for p1, p2, token in gather_annotations(): + count += 1 + token_id = 't%s' % count + frame_id = 'tf%s' % count + align_id = 'a%s' % (count + 1) + frame_p1 = FIRST_TIME_OFFSET + p1 * STEP + frame_p2 = FIRST_TIME_OFFSET + p2 * STEP + print_annotation( + "http://vocab.lappsgrid.org/Token", + [('id', token_id), ('start', p1), ('end', p2), ('text', token)]) + print_annotation( + "http://mmif.clams.ai/0.2.0/vocabulary/TimeFrame", + [('id', frame_id), ('start', frame_p1), ('end', frame_p2)]) + print_annotation( + "http://mmif.clams.ai/0.2.0/vocabulary/Alignment", + [('id', align_id), ('source', frame_id), ('target', token_id)]) diff --git a/docs/1.0.1/samples/everything/scripts/ner.py b/docs/1.0.1/samples/everything/scripts/ner.py new file mode 100644 index 00000000..fc2bc807 --- /dev/null +++ b/docs/1.0.1/samples/everything/scripts/ner.py @@ -0,0 +1,58 @@ +"""ner.py + +Utility script to create the named entities of the NER view in the example. + +Tracking entities back to times (done manually): + +Jim Lehrer v4 t5 td1:15-18 --> tf4 7255-7606 + v4 t6 td1:19-25 --> tf5 7723-8425 + +PBS v4 t11 td1:47-50 --> tf11 10999-11350 + +New York v7 ne10 v6:td25:7-16 + v6 td25 text: "Dog in New York" --> v5:bb25 + v5 bb25 timePoint: 21000 +""" + +from utils import print_annotation + + +# Entities from the text documents, again some repetition. +entities = [ + ('1982-05-12', 'Date', 'v6:td2'), + ('Jim Lehrer', 'Person', 'v6:td6'), + ('Sara Just', 'Person', 'v6:td8'), + ('1982-05-12', 'Date', 'v6:td10'), + ('Jim Lehrer', 'Person', 'v6:td14'), + ('Sara Just', 'Person', 'v6:td16'), + ('1982-05-12', 'Date', 'v6:td18'), + ('Jim Lehrer', 'Person', 'v6:td22'), + ('Sara Just', 'Person', 'v6:td24'), + ('New York', 'Location', 'v6:td25', 7, 15), + ('Jim Lehrer', 'Person', 'v4:td1', 15, 25), + ('PBS', 'Organization', 'v4:td1', 47, 50) ] + + +if __name__ == '__main__': + + count = 0 + for entity in entities: + count += 1 + ner_id = 'ne%s' % count + text = entity[0] + cat = entity[1] + document = entity[2] + if len(entity) == 5: + start = entity[3] + end = entity[4] + else: + start = 0 + end = len(text) + print_annotation( + "http://vocab.lappsgrid.org/NamedEntity", + [('id', ner_id), + ('document', document), + ('start', start), + ('end', end), + ('category', cat), + ('text', text)]) diff --git a/docs/1.0.1/samples/everything/scripts/pbcore.py b/docs/1.0.1/samples/everything/scripts/pbcore.py new file mode 100644 index 00000000..3d7638f3 --- /dev/null +++ b/docs/1.0.1/samples/everything/scripts/pbcore.py @@ -0,0 +1,132 @@ +"""pbcore.py + +Script to experiment with exporting information from a MMIF file into PBCore. + +See ../pbcore.md for a description. + +""" + +import sys +import json + + +CONTRIBUTOR_TYPES = ('Host', 'Producer') +TITLE_TYPE = 'Title' +DATE_TYPE = 'Date' + +TAG_TYPE = 'http://vocab.lappsgrid.org/SemanticTag' + +ENTITY_TYPE = 'http://vocab.lappsgrid.org/NamedEntity' +ENTITY_CATEGORY = 'Person' + + +class MMIF(object): + + """Simplistic MMIF class, will be deprecated when the MMIF SDK is stable.""" + + def __init__(self, fname): + self.json = json.load(open(infile)) + self.metadata = self.json['metadata'] + self.documents = self.json['documents'] + self.views = [View(self, view) for view in self.json['views']] + + def get_view(self, view_id): + for view in self.views: + if view.id == view_id: + return view + + +class View(object): + + def __init__(self, mmif, json_obj): + self.mmif = mmif + self.id = json_obj['id'] + self.metadata = json_obj['metadata'] + self.annotations = [Annotation(self, anno) for anno in json_obj['annotations']] + + def __str__(self): + return "" % (self.id, self.metadata['app']) + + def get_document(self, annotation): + return ( + annotation.get_property('document') + or self.metatdata['contains'][annotation.type]['document']) + + def get_entities(self): + entities = {} + for anno in self.annotations: + if anno.type == ENTITY_TYPE: + entity = anno.get_property('text') + cat = anno.get_property('category') + doc = self.get_document(anno) + p1 = anno.get_property('start') + p2 = anno.get_property('end') + entities.setdefault(cat, {}) + entities[cat].setdefault(entity, []).append((entity, doc, p1, p2, anno, anno)) + return entities + + def get_persons(self): + persons = [] + for anno in self.annotations: + if (anno.type == ENTITY_TYPE + and anno.get_property('category') == ENTITY_CATEGORY): + persons.append(anno) + return persons + + def get_contributors(self): + """Pull all contributors from the slate parser view.""" + contributors = {} + for anno in self.annotations: + tagname = anno.get_property('tagName') + if anno.type == TAG_TYPE and tagname in CONTRIBUTOR_TYPES: + contributors.setdefault(tagname, set()).add(anno.get_property('text')) + return contributors + + +class Annotation(object): + + def __init__(self, view, json_obj): + self.view = view + self.type = json_obj['@type'] + self.id = json_obj['properties']['id'] + self.properties = json_obj['properties'] + + def get_property(self, prop): + return self.properties.get(prop) + + +def print_entities(entities): + for cat in entities: + for entity in entities[cat]: + print("%-16s%-15s" % (cat, entity), end='') + for spec in entities[cat][entity]: + anchor = "%s-%s-%s" % (spec[1], spec[2], spec[3]) + print(anchor, end=' ') + print() + + +if __name__ == '__main__': + + infile = sys.argv[1] + mmif = MMIF(infile) + + bt_view = mmif.get_view("v1") + ner_view = mmif.get_view("v7") + tags_view = mmif.get_view("v8") + + print(bt_view) + print(ner_view) + print(tags_view) + + entities = ner_view.get_entities() + persons = ner_view.get_persons() + #locations = ner_view.get_locations() + + contributors = tags_view.get_contributors() + #date = tags_view.get_date() + #title = tags_view.get_date() + + print_entities(entities) + + print(persons) + print(contributors) diff --git a/docs/1.0.1/samples/everything/scripts/slates.py b/docs/1.0.1/samples/everything/scripts/slates.py new file mode 100644 index 00000000..77c2230c --- /dev/null +++ b/docs/1.0.1/samples/everything/scripts/slates.py @@ -0,0 +1,45 @@ +"""slates.py + +Utility script to create the semantic tags of the slate parser view in the +example. + +""" + +from utils import print_annotation + + +# Tags from the text documents for the slates, again some repetition. Very +# similar to the named entities, but with the title added and non-slate entities +# removed. +tags = [ + ('1982-05-12', 'Date', 'v6:td2'), + ('Loud Dogs', 'Title', 'v6:td4'), + ('Jim Lehrer', 'Host', 'v6:td6'), + ('Sara Just', 'Producer', 'v6:td8'), + ('1982-05-12', 'Date', 'v6:td10'), + ('Loud Dogs', 'Title', 'v6:td12'), + ('Jim Lehrer', 'Host', 'v6:td14'), + ('Sara Just', 'Producer', 'v6:td16'), + ('1982-05-12', 'Date', 'v6:td18'), + ('Loud Dogs', 'Title', 'v6:td20'), + ('Jim Lehrer', 'Host', 'v6:td22'), + ('Sara Just', 'Producer', 'v6:td24') ] + + +if __name__ == '__main__': + + count = 0 + for tag in tags: + count += 1 + tag_id = 'st%s' % count + text, cat, document = tag + start = 0 + end = len(text) + print_annotation( + "http://vocab.lappsgrid.org/SemanticTag", + [('id', tag_id), + ('document', document), + ('start', start), + ('end', end), + ('tagName', cat), + ('text', text)]) diff --git a/docs/1.0.1/samples/everything/scripts/tesseract.py b/docs/1.0.1/samples/everything/scripts/tesseract.py new file mode 100644 index 00000000..57f98530 --- /dev/null +++ b/docs/1.0.1/samples/everything/scripts/tesseract.py @@ -0,0 +1,36 @@ +"""tesseract.py + +Utility script to create the text documents and alignments of the EAST view in +the example. + +""" + +from utils import print_annotation + + +# These are lined up in order of the bounding boxes from EAST. Notice the +# repetition reflecting that identical bounding boxes form three time points +text_values = [ + 'DATE', '1982-05-12', 'TITLE', 'Loud Dogs', 'HOST', 'Jim Lehrer', 'PROD', 'Sara Just', + 'DATE', '1982-05-12', 'TITLE', 'Loud Dogs', 'HOST', 'Jim Lehrer', 'PROD', 'Sara Just', + 'DATE', '1982-05-12', 'TITLE', 'Loud Dogs', 'HOST', 'Jim Lehrer', 'PROD', 'Sara Just', + 'Dog in New York' ] + + +if __name__ == '__main__': + + count = 0 + for text in text_values: + count += 1 + box_id = 'v5:bb%s' % count + text_id = 'td%s' % count + align_id = 'a%s' % count + print_annotation( + "http://mmif.clams.ai/0.2.0/vocabulary/TextDocument", + [('id', text_id), + ('text-@value', text)]) + print_annotation( + "http://mmif.clams.ai/0.2.0/vocabulary/Alignment", + [('id', align_id), + ('source', box_id), + ('target', text_id)]) diff --git a/docs/1.0.1/samples/everything/scripts/utils.py b/docs/1.0.1/samples/everything/scripts/utils.py new file mode 100644 index 00000000..ee203644 --- /dev/null +++ b/docs/1.0.1/samples/everything/scripts/utils.py @@ -0,0 +1,20 @@ + +def print_annotation(attype, properties): + print(" {") + print(' "@type": "%s",' % attype) + print(' "properties": {') + for prop, value in properties[:-1]: + print_property(prop, value) + for prop, value in properties[-1:]: + print_property(prop, value, last=True) + print(" },") + + +def print_property(prop, value, last=False): + eol = ' }' if last else ',' + if type(value) in (int, list): + print(' "%s": %s%s' % (prop, value, eol)) + elif prop == 'text-@value': + print(' "text": { "@value": "%s" }%s' % (value, eol)) + else: + print(' "%s": "%s"%s' % (prop, value, eol)) diff --git a/docs/1.0.1/samples/segmenter-kaldi-ner/index.md b/docs/1.0.1/samples/segmenter-kaldi-ner/index.md new file mode 100644 index 00000000..723f4bc0 --- /dev/null +++ b/docs/1.0.1/samples/segmenter-kaldi-ner/index.md @@ -0,0 +1,182 @@ +--- +layout: page +title: MMIF Specification +subtitle: Version 1.0.1 +--- + +# Example: Segmenter, Kaldi and NER + +This example contains one audio document and three views: one created by the audio segmenter, one created by Kaldi and one created by a named entity recognizer. + +We now give fragments of the three views, each with some comments. + +To see the full example scroll down to the end or open the [raw json file](raw.json). + +### Fragment 1: the Segmenter view + +Metadata: + +```json +{ + "app": "http://mmif.clams.ai/apps/audio-segmenter/0.2.1", + "contains": { + "http://mmif.clams.ai/vocabulary/TimeFrame/v2": { + "timeUnit": "milliseconds", + "document": "m1" } +} +``` + +All time frames in the view are anchored to document "m1" and milliseconds are used for the unit. + +Partial annotations list: + +```json +[ + { + "@type": "http://mmif.clams.ai/vocabulary/TimeFrame/v2", + "properties": { + "id": "tf1", + "frameType": "speech", + "start": 17, + "end": 132 } + }, + { + "@type": "http://mmif.clams.ai/vocabulary/TimeFrame/v2", + "properties": { + "frameType": "non-speech", + "id": "tf2", + "start": 132, + "end": 194 } + } +] +``` + +Two of the three time frames are shown here: one for a speech segment and one for a non-speech segment. Only the speech frames are input to Kaldi. + +### Fragment 2: the Kaldi view + +Metadata: + +```json +{ + "app": "http://mmif.clams.ai/apps/kaldi/0.2.1", + "contains": { + "http://mmif.clams.ai/vocabulary/TextDocument/v1": {}, + "http://vocab.lappsgrid.org/Token": {}, + "http://mmif.clams.ai/vocabulary/TimeFrame/v2": { + "timeUnit": "milliseconds", + "document": "m1" }, + "http://mmif.clams.ai/vocabulary/Alignment/v1": {} } +} +``` + +Kaldi creates five kinds of annotations: + +1. Text documents for each speech time frame. +2. Tokens for each text document. +3. Time frames that correspond to each token, these time frames are all anchored to document "m1". +4. Alignments from speech frames to text documents, the speech frames were created by the segmenter. +5. Alignments from time frames to tokens. + +The annotations list has two documents, one shown here: + +```json +{ + "@type": "http://mmif.clams.ai/vocabulary/TextDocument/v1", + "properties": { + "id": "td1", + "text": { + "@value": "Fido barks" } } +} +``` + +This document does not know it's history, but Kaldi also creates an alignment that spells out what time frame the document is aligned to: + +```json +{ + "@type": "http://mmif.clams.ai/vocabulary/Alignment/v1", + "properties": { + "id": "a1", + "source": "v1:tf1", + "target": "td1" } +} +``` + +Each document is tokenized, here showing one token from the document above: + +```json +{ + "@type": "http://vocab.lappsgrid.org/vocabulary/Token", + "id": "t1", + "properties": { + "document": "v2:td1", + "start": 0, + "end": 4, + "text": "Fido" } +} +``` + +Note how the token uses the *document* property to specify what document this is an annotation of. This has to be specified for each token because the Kaldi view has more than one text document. + +The token is associated with a time frame in document "m1": + +```json +{ + "@type": "http://mmif.clams.ai/vocabulary/TimeFrame/v2", + "properties": { + "id": "tf1", + "start": 17, + "end": 64 } +} +``` + +And the token and time frame are linked by an alignment: + +```json +{ + "@type": "http://mmif.clams.ai/vocabulary/Alignment/v1", + "properties": { + "id": "a2", + "source": "tf1", + "target": "t1" } +} +``` + +If Kaldi had run on the entire document (not using just speech frames) then the result could be a bit different in that there would be just one text document and the token metadata in the view could specify that so tokens would not have individual *document* properties. + +### Fragment 3: the NET view + +Metadata: + +```json +{ + "app": "http://mmif.clams.ai/apps/stanford-ner/0.2.1", + "contains": { + "http://vocab.lappsgrid.org/NamedEntity": {} } +} +``` + +One of the two named entity annotations: + +```json +{ + "@type": "http://vocab.lappsgrid.org/NamedEntity", + "properties": { + "id": "ne1", + "document": "v2:td1", + "start": 0, + "end": 4, + "category": "Person", + "word": "Fido" } +} +``` + +Notice how the entity anchors to one of the document created by Kaldi. + + + +## Full MMIF File + +```json +{% include_relative raw.json %} +``` diff --git a/docs/1.0.1/samples/segmenter-kaldi-ner/raw.json b/docs/1.0.1/samples/segmenter-kaldi-ner/raw.json new file mode 100644 index 00000000..4ccf748d --- /dev/null +++ b/docs/1.0.1/samples/segmenter-kaldi-ner/raw.json @@ -0,0 +1,247 @@ +{ + "metadata": { + "mmif": "http://mmif.clams.ai/1.0.1" + }, + "documents": [ + { + "@type": "http://mmif.clams.ai/vocabulary/AudioDocument/v1", + "properties": { + "id": "m1", + "mime": "audio/mpeg", + "location": "file:///var/archive/audio-002.mp3" + } + } + ], + "views": [ + { + "id": "v1", + "metadata": { + "app": "http://mmif.clams.ai/apps/audio-segmenter/0.2.1", + "contains": { + "http://mmif.clams.ai/vocabulary/TimeFrame/v2": { + "timeUnit": "milliseconds", + "document": "m1" + } + } + }, + "annotations": [ + { + "@type": "http://mmif.clams.ai/vocabulary/TimeFrame/v2", + "properties": { + "id": "tf1", + "frameType": "speech", + "start": 17, + "end": 132 + } + }, + { + "@type": "http://mmif.clams.ai/vocabulary/TimeFrame/v2", + "properties": { + "frameType": "non-speech", + "id": "tf2", + "start": 132, + "end": 194 + } + }, + { + "@type": "http://mmif.clams.ai/vocabulary/TimeFrame/v2", + "properties": { + "id": "tf3", + "frameType": "speech", + "start": 194, + "end": 342 + } + } + ] + }, + { + "id": "v2", + "metadata": { + "app": "http://mmif.clams.ai/apps/kaldi/0.2.1", + "contains": { + "http://mmif.clams.ai/vocabulary/TextDocument/v1": {}, + "http://vocab.lappsgrid.org/Token": {}, + "http://mmif.clams.ai/vocabulary/TimeFrame/v2": { + "timeUnit": "milliseconds", + "document": "m1" + }, + "http://mmif.clams.ai/vocabulary/Alignment/v1": {} + } + }, + "annotations": [ + { + "@type": "http://mmif.clams.ai/vocabulary/TextDocument/v1", + "properties": { + "id": "td1", + "text": { + "@value": "Fido barks" + } + } + }, + { + "@type": "http://mmif.clams.ai/vocabulary/Alignment/v1", + "properties": { + "id": "a1", + "source": "v1:tf1", + "target": "td1" + } + }, + { + "@type": "http://vocab.lappsgrid.org/Token", + "properties": { + "id": "t1", + "document": "v2:td1", + "start": 0, + "end": 4, + "text": "Fido" + } + }, + { + "@type": "http://vocab.lappsgrid.org/Token", + "properties": { + "id": "t2", + "document": "v2:td1", + "start": 5, + "end": 10, + "text": "barks" + } + }, + { + "@type": "http://mmif.clams.ai/vocabulary/TimeFrame/v2", + "properties": { + "id": "tf1", + "start": 17, + "end": 64 + } + }, + { + "@type": "http://mmif.clams.ai/vocabulary/TimeFrame/v2", + "properties": { + "id": "tf2", + "start": 65, + "end": 132 + } + }, + { + "@type": "http://mmif.clams.ai/vocabulary/Alignment/v1", + "properties": { + "id": "a2", + "source": "tf1", + "target": "t1" + } + }, + { + "@type": "http://mmif.clams.ai/vocabulary/Alignment/v1", + "properties": { + "id": "a3", + "source": "tf2", + "target": "t2" + } + }, + { + "@type": "http://mmif.clams.ai/vocabulary/TextDocument/v1", + "properties": { + "id": "td2", + "textSource": "v1:tf3", + "text": { + "@value": "Fluffy sleeps" + } + } + }, + { + "@type": "http://mmif.clams.ai/vocabulary/Alignment/v1", + "properties": { + "id": "a4", + "source": "v1:tf3", + "target": "td2" + } + }, + { + "@type": "http://vocab.lappsgrid.org/Token", + "properties": { + "id": "t3", + "document": "v2:td2", + "start": 0, + "end": 6, + "text": "Fluffy" + } + }, + { + "@type": "http://vocab.lappsgrid.org/Token", + "properties": { + "id": "t4", + "document": "v2:td2", + "start": 7, + "end": 13, + "text": "sleeps" + } + }, + { + "@type": "http://mmif.clams.ai/vocabulary/TimeFrame/v2", + "properties": { + "id": "tf3", + "start": 194, + "end": 240 + } + }, + { + "@type": "http://mmif.clams.ai/vocabulary/TimeFrame/v2", + "properties": { + "id": "tf4", + "start": 241, + "end": 342 + } + }, + { + "@type": "http://mmif.clams.ai/vocabulary/Alignment/v1", + "properties": { + "id": "a5", + "source": "tf3", + "target": "t3" + } + }, + { + "@type": "http://mmif.clams.ai/vocabulary/Alignment/v1", + "properties": { + "id": "a5", + "source": "tf4", + "target": "t4" + } + } + ] + }, + { + "id": "v3", + "metadata": { + "app": "http://mmif.clams.ai/apps/stanford-ner/0.2.1", + "contains": { + "http://vocab.lappsgrid.org/NamedEntity": {} + } + }, + "annotations": [ + { + "@type": "http://vocab.lappsgrid.org/NamedEntity", + "properties": { + "id": "ne1", + "document": "v2:td1", + "start": 0, + "end": 4, + "category": "Person", + "word": "Fido" + } + }, + { + "@type": "http://vocab.lappsgrid.org/NamedEntity", + "properties": { + "id": "ne2", + "document": "v2:td2", + "start": 0, + "end": 6, + "category": "Person", + "word": "Fluffy" + } + } + ] + } + ] +} diff --git a/docs/1.0.1/schema/lif.json b/docs/1.0.1/schema/lif.json new file mode 100644 index 00000000..bf02f031 --- /dev/null +++ b/docs/1.0.1/schema/lif.json @@ -0,0 +1,116 @@ +{ + "$schema": "http://json-schema.org/draft-04/schema#", + "title": "LAPPS Interchange Format", + "description": "The JSON-LD objects exchanged by LAPPS web services.", + "type": "object", + "additionalProperties": false, + "properties": { + "@context": { + "oneOf": [ + { + "type": "object", + "additionalProperties": true + }, + { + "type": "string", + "format": "uri" + } + ] + }, + "@vocab": { + "type": "string", + "format": "uri" + }, + "text": { + "type": "object", + "properties": { + "@value": { + "type": "string" + }, + "@language": { + "type": "string" + } + }, + "required": [ + "@value" + ], + "additionalProperties": false + }, + "metadata": { + "$ref": "#/definitions/map" + }, + "views": { + "type": "array", + "items": { + "$ref": "#/definitions/view" + } + } + }, + "definitions": { + "map": { + "type": "object", + "additionalProperties": true + }, + "view": { + "type": "object", + "properties": { + "id": { + "type": "string" + }, + "metadata": { + "$ref": "#/definitions/map" + }, + "annotations": { + "$ref": "#/definitions/annotations" + } + }, + "additionalProperties": false, + "required": [ + "id", + "annotations" + ] + }, + "annotations": { + "type": "array", + "items": { + "$ref": "#/definitions/annotation" + } + }, + "annotation": { + "type": "object", + "properties": { + "id": { + "type": "string" + }, + "@type": { + "type": "string" + }, + "type": { + "type": "string" + }, + "label": { + "type": "string" + }, + "start": { + "type": "integer", + "minimum": -1 + }, + "end": { + "type": "integer", + "minimum": -1 + }, + "features": { + "$ref": "#/definitions/map" + }, + "metadata": { + "$ref": "#/definitions/map" + } + }, + "required": [ + "id", + "@type" + ], + "additionalProperties": false + } + } +} diff --git a/docs/1.0.1/schema/mmif.json b/docs/1.0.1/schema/mmif.json new file mode 100644 index 00000000..97119553 --- /dev/null +++ b/docs/1.0.1/schema/mmif.json @@ -0,0 +1,191 @@ +{ + "$schema": "http://json-schema.org/draft-04/schema#", + "title": "Multi-Media Interchange Format", + "description": "The JSON-LD objects exchanged by CLAMS services.", + "type": "object", + "additionalProperties": false, + "properties": { + "metadata": { + "$ref": "#/definitions/mmifMetadata" + }, + "documents": { + "type": "array", + "items": { + "$ref": "#/definitions/annotation" + }, + "minLength": 1 + }, + "views": { + "type": "array", + "items": { + "$ref": "#/definitions/view" + } + } + }, + "required": [ + "metadata", + "documents", + "views" + ], + "definitions": { + "strStrMap": { + "type": "object", + "patternProperties": { + ".+": { + "anyOf": [ + {"type": "string"}, + {"type": "array", "items": { "type": "string" }} + ] + } + } + }, + "mmifMetadata": { + "type": "object", + "properties": { + "mmif": { + "type": "string", + "format": "uri", + "minLength": 7 + } + }, + "required": [ + "mmif" + ] + }, + "viewMetadata": { + "type": "object", + "properties": { + "timestamp": { + "type": "string", + "format": "date-time" + }, + "app": { + "type": "string", + "format": "uri", + "minLength": 7 + }, + "contains": { + "type": "object", + "additionalProperties": false, + "patternProperties": { + "^https?:\/\/": { + "$ref": "#/definitions/strStrMap" + } + } + }, + "error": { + "type": "object", + "properties": { + "message": { + "type": "string", + "minLength": 1 + }, + "stackTrace": { + "type": "string" + } + }, + "required": ["message"] + }, + "warnings": { + "type": "array", + "items": { + "type": "string" + }, + "minLength": 1 + }, + "parameters": { + "$ref": "#/definitions/strStrMap" + } + }, + "oneOf": [ + { + "required": [ + "app", + "contains" + ] + }, + { + "required": [ + "app", + "warnings" + ] + }, + { + "required": [ + "app", + "error" + ] + } + ] + }, + "text": { + "type": "object", + "properties": { + "@value": { + "type": "string" + }, + "@language": { + "type": "string" + } + }, + "additionalProperties": false, + "required": [ + "@value" + ] + }, + "view": { + "type": "object", + "properties": { + "id": { + "type": "string", + "minLength": 1 + }, + "metadata": { + "$ref": "#/definitions/viewMetadata" + }, + "annotations": { + "type": "array", + "items": { + "$ref": "#/definitions/annotation" + } + } + }, + "additionalProperties": false, + "required": [ + "id", + "metadata", + "annotations" + ] + }, + "annotation": { + "type": "object", + "properties": { + "@type": { + "type": "string", + "minLength": 1 + }, + "properties": { + "$ref": "#/definitions/annotationProperties" + } + }, + "required": [ + "@type", + "properties" + ], + "additionalProperties": false + }, + "annotationProperties": { + "type": "object", + "properties": { + "id": { + "type": "string", + "minLength": 1 + } + }, + "required": [ + "id" + ] + } + } +} + diff --git a/docs/1.0.1/vocabulary/attypeversions.json b/docs/1.0.1/vocabulary/attypeversions.json new file mode 100644 index 00000000..5005eaae --- /dev/null +++ b/docs/1.0.1/vocabulary/attypeversions.json @@ -0,0 +1 @@ +{"Thing": "v1", "Annotation": "v2", "Region": "v1", "TimePoint": "v1", "Interval": "v1", "Span": "v1", "TimeFrame": "v2", "Chapter": "v2", "Polygon": "v1", "BoundingBox": "v1", "VideoObject": "v1", "Relation": "v1", "Document": "v1", "VideoDocument": "v1", "AudioDocument": "v1", "ImageDocument": "v1", "TextDocument": "v1", "Alignment": "v1"} \ No newline at end of file diff --git a/docs/1.0.1/vocabulary/css/lappsstyle.css b/docs/1.0.1/vocabulary/css/lappsstyle.css new file mode 100644 index 00000000..582de8f8 --- /dev/null +++ b/docs/1.0.1/vocabulary/css/lappsstyle.css @@ -0,0 +1,503 @@ +.fixed { + width: 15%; +} + +.col1 { + width: 25%; +} +td { + vertical-align: top; +} + +.definition { + padding-right: 10px; +} + +ul.tree { list-style:none; } +.hidden { display: none; } + +.property { + font-size: 90%; + font-style: italic; + color: #105010; +} +.index { + margin-top: 1em; + text-align: left; + font-size: 110%; + margin-bottom: 1em; +} +.deprecated { text-decoration: line-through; } + + +/* -- page structure -- */ +#container +{ + width: 100%; + text-align: left; + margin: 0; + background: #fff; +} +#intro +{ + position: relative; +} +#mainContent +{ + /* margin: 0 50px 15px 50px; */ + padding-bottom: 5px; + border-bottom: solid 1px #CCCCCC; + text-align: left; + font-size: small; +} + +#mainContent, #footer { + /* max-width: 960px; + min-width: 350px; */ + margin: 0 auto; +} +#footer +{ + text-align: right; + font-size: x-small; +} + +/* -- general -- */ +body +{ + color: #3A4956; + font-size: 75%; + font-family: "Lucida Grande" , "Lucida Sans Unicode" , Verdana, Tahoma, Arial, sans-serif; + line-height: 160%; + margin: 0; + padding: 0; +} +code +{ + font-family: Courier, monospace; +} +p.head, h1 +{ + font: bold 24px Helvetica, Arial, sans-serif; + color: #336699; + letter-spacing: -1px; + margin: 1em 0 0 0; +} +h2 +{ + /* border-top: 1px solid #336699; */ + padding-top: 5px; + clear: both; + color: #336699; + font: normal 18px Helvetica, Arial, sans-serif; + margin: 1em 0 0 0; +} +h3 +{ + font-size: 12px; + color: #660000; + margin: 1em 0 0 0; + position: relative; + top: 8px; +} +hr +{ + border: none; + height: 1px; + background: #ccc; + margin: 2em 0 4em 0; +} +p +{ + margin: 1em 0 0 0; +} +pre +{ + font-family: Courier, monospace; + font-size: 120%; + background: #E1E1E1; + width: auto; + padding: 5px 5px 5px 10px; + margin: 1em 0 0 0; + text-align: left; + overflow: auto; +} + +/* -- header/title -- */ +#pageHeader +{ + width: 100%; + height: 80px; + background: #336699; + position: top; +} +#pageHeader h1 +{ + color: #fff; + margin: 0 0 5px 40px; + /* font: bold Helvetica, Arial, sans-serif; */ + font-weight: bold; + font-family: Helvetica, Arial, sans-serif; + letter-spacing: -1px; +} +#pageHeader h2 { + color: #fff; + margin-left: 40px; + text-shadow: 0 2px 0 #510000; +} + +#pageHeader a:link, #pageHeader a:hover, #pageHeader a:visited +{ + color: #fff; + background-color: #336699; + text-decoration: none; +} + + +/* -- nav bar -- */ +#selectionbar +{ + color: #fff; + height: 46px; + background: #660000; + font-size: 90%; +} +#selectionbar ul +{ + margin: 0; + padding: 1em 1em 0 0; +} +#selectionbar li +{ + display: inline; + list-style: none; +} +#selectionbar a:link, #selectionbar a:visited +{ + color: #fff; + display: block; + float: right; + padding: 1px 9px 3px 6px; + margin: 0 6px; + text-decoration: none; +} +#selectionbar a:hover +{ + color: #FFEE99; + background-color: transparent; +} +#selectionbar .activelink a +{ + background: #336699; +} +#selectionbar .activelink a:hover +{ + color: #fff; + background-color: #336699; + cursor: default; +} + + +/* -- main content -- */ +#mainContent +{ + font-size: 100%; +} +#mainContent ul li +{ + list-style: inherit; + padding: 0 0 0 5px; + margin: 0; +} +#mainContent a:link +{ + color: #660000; + text-decoration: none; + border-bottom: dotted 1px #660000; +} +#mainContent a:visited +{ + color: #336699; + text-decoration: none; + border-bottom: dotted 1px #336699; +} +#mainContent a:hover +{ + border-bottom: none; + color: #fff; + background-color: #660000; + text-decoration: none; +} +#mainContent blockquote +{ + padding: 0 0 0 15px; + margin: 10px 0 10px 15px; + width: auto; + float: right; + border-left: thin dotted #000; +} + + +/* -- faq -- */ +.faq p, .faq pre, .faq ul, .faq table, .faq +{ + margin: .5em 0 0 50px; + padding-top: 0px; + padding-bottom: 2px; + color: #3A4956; +} + +.faq h1 +{ + margin-bottom: 1em; +} +.faq ul, .faq ol +{ + padding-left: 30px; + margin-left: 50px; +} +#mainContent .question +{ + font-weight: bold; + margin: 1.5em 0 0 0; + padding-top: 0px; + padding-bottom: 2px; +} + +/* -- types -- */ +table.definition-table +{ + margin: 1em 0 0 0; + border: 1px solid #98A0A6; + width: 100%; + border-collapse: collapse; +} +.definition-table th +{ + text-align: left; + background: #C7CBCE; + padding-left: 5px; +} +.definition-table td +{ + padding: 0 5px 2px 5px; + margin: 0; + vertical-align: top; + border: 1px solid #98A0A6; + border-collapse: collapse; +} +.definition-table td p +{ + padding: 0 0 .6em 0; + margin: 0; +} +.definition-table td ul +{ + padding-top: 0; + margin-top: 0; +} +.definition-table tr.alt +{ + background: #E9EAEB; +} +div.attrib +{ + padding-bottom: 1em; +} + +/* -- hierarchy -- */ +table.h, .h tr, .h td +{ + border: none; + margin: 0; + padding: 0; + border-collapse: collapse +} +.h .space +{ + width: 20px +} +.h .bar +{ + background-color: #000; + width: 1px +} +.h .tc +{ + text-indent: -21px; + padding-left: 21px +} + +/* -- links --*/ +a.path:link {color:silver;text-decoration:none;} +a.path:visited {color:silver;text-decoration:none;} +a.path:hover {color:gray;text-decoration:none;} + + +a.two:link {color:#333333;text-decoration:none;} +a.two:visited {color:#333333;text-decoration:none;} +a.two:hover {color:gray;text-decoration:none;} + +a.inherited-property:link {color:#336699;text-decoration:none;} +a.inherited-property:visited {color:#336699;text-decoration:none;} +a.inherited-property:hover {color:gray;text-decoration:none;} + +a.sameas:link {color:#336699;text-decoration:none;} +a.sameas:visited {color:#336699;text-decoration:none;} +a.sameas:hover {color:gray;text-decoration:none;} + +#arial1 { + font-family: Arial, Helvetica, sans-serif; + color: #333333; + font-size: small; +} + +tbody { + font-size: small; +} + +pre { + font-size: small; + background-color: #dddddd; + border-radius: 5px; +} + +#element +{ + font-size: x-large; + color: #336699; + font-weight: bold; +} + +#properties { + color: #336699; +} + +#prop-table { + width: 1126px; + height: 27px; +} + +#col1 { + width: 188px; +} + +#col2 { + width: 160px; +} + +#col3 { + width: 500px; +} + +#col4 { + width: 278px; +} + +#headrow { + background-color: #cccccc; + font-weight: bold; +} + + + +/* -- other -- */ +.backtotop, .faq .backtotop +{ + float: right; + clear: both; + padding: 3em 0 0 4em; + padding: 0; + font-size: 90%; +} +.date, .faq .date +{ + color: #BFC3C7; + text-align: right; + font-size: x-small; + clear: both; + padding-top: 4em; +} +.required +{ + text-align: right; + float: right; +} +.version +{ + color: #BFC3C7; + text-align: right; + font-size: x-small; + clear: both; + padding-top: 1em; +} + +#selectionbar ul +{ + float: right; + padding: 10px 0; +} +#mainContent, #footer, .wrapper { + /* max-width: 960px; */ + min-width: 350px; + margin: 0 auto !important; + padding: 0 70px; +} +#pageHeader h1 { + position:relative; + top: 25px; left: -40px; + text-shadow: 0 2px 0 #510000; + pointer-events: none; + padding: 0 40px; +} +#selectionbar ul { + margin: 0 auto; + display: block; +} +#cse-search-form { + float: right; + margin-top:-20px; + width: 248px !important; +} +.gsc-input input.gsc-input { background: #FFF !important;} + +@media all and (max-width: 720px) { + #pageHeader { + height: 120px; + } + #cse-search-form { + margin-left: 15px; + margin-top: 20px; + } +} + + +/* -- extras -- */ + +input.gsc-input { + border-color: #660000; + color: #333333; + font-family: "Lucida Grande" , "Lucida Sans Unicode" , Verdana, Tahoma, Arial, sans-serif; + font-size: 11px; + padding: 3px; + width: 99%; +} +input.gsc-search-button { + background-color: #660000; + border-color: #660000; + color: #fff; + font-family: inherit; + font-size: 11px; + font-weight: normal; + padding: 2px 8px; + text-shadow: none; +} +.gsc-input input.gsc-input { + background: none repeat scroll 0% 0% white !important; + border-color: #660000; + padding: 3px; + width: 99%; +} +.gsc-clear-button { + display: none; +} diff --git a/docs/1.0.1/vocabulary/index.html b/docs/1.0.1/vocabulary/index.html new file mode 100644 index 00000000..452dcfee --- /dev/null +++ b/docs/1.0.1/vocabulary/index.html @@ -0,0 +1,430 @@ + + + + + CLAMS Vocabulary + + + +
+
+ +
+
+

+ + The CLAMS Vocabulary defines an ontology of terms for a core of objects and features exchanged amongst tools that process multi-media data. It is based on the LAPPS Web Service Exchange Vocabulary at + + + http://vocab.lappsgrid.org. + + + The vocabulary is being developed bottom-up on an as-needed basis for use in the development of the CLAMS platform. + + + In the hierarchy below annotation types are printed with the properties defined for them, metadata properties are printed between square brackets. + +

+
+ + + + + + + + + + +
+ + Thing (v1) + + + : id + +
+ + + + + + + + + + + + + +
+ + Annotation (v2) + + + : [document] + + + : document + +
+ + + + + + + + + + + + + +
+ + Region (v1) + + + : [timeUnit] + +
+ + + + + + + + + + + + + +
+ + TimePoint (v1) + + + : timePoint + +
+ + + +
+ + + + + + + + + + +
+ + Interval (v1) + + + : start, end, targets + +
+ + + + + + + + + + + + + +
+ + Span (v1) + +
+ + + +
+ + + + + + + + + + +
+ + TimeFrame (v2) + + + : frameType + +
+ + + + + + + + + + + + + +
+ + Chapter (v2) + + + : title + +
+ + + +
+
+
+ + + + + + + + + + +
+ + Polygon (v1) + + + : coordinates, timePoint + +
+ + + + + + + + + + + + + +
+ + BoundingBox (v1) + + + : boxType + +
+ + + +
+
+ + + + + + + + + + +
+ + VideoObject (v1) + + + : polygons + +
+ + + +
+
+ + + + + + + + + + +
+ + Relation (v1) + +
+ + + +
+
+ + + + + + + + + + +
+ + Document (v1) + + + : location, mime + +
+ + + + + + + + + + + + + +
+ + VideoDocument (v1) + +
+ + + +
+ + + + + + + + + + +
+ + AudioDocument (v1) + +
+ + + +
+ + + + + + + + + + +
+ + ImageDocument (v1) + +
+ + + +
+ + + + + + + + + + +
+ + TextDocument (v1) + + + : text + +
+ + + +
+
+ + + + + + + + + + +
+ + Alignment (v1) + + + : [sourceType, targetType] + + + : source, target + +
+ + + +
+
+
+
+
+ + + diff --git a/docs/_config.yml b/docs/_config.yml index 4115a9e0..7e0687ab 100644 --- a/docs/_config.yml +++ b/docs/_config.yml @@ -21,6 +21,7 @@ navbar-links: CLAMS: "https://clams.ai" # a new version will be added here by build.py VERSIONS: + - 1.0.1: '1.0.1' - 1.0.0: '1.0.0' - 0.5.0: '0.5.0' - 0.4.2: '0.4.2' diff --git a/docs/vocabulary/Alignment/v1/index.html b/docs/vocabulary/Alignment/v1/index.html index db82e056..e30c518c 100644 --- a/docs/vocabulary/Alignment/v1/index.html +++ b/docs/vocabulary/Alignment/v1/index.html @@ -10,10 +10,10 @@
@@ -28,6 +28,10 @@

1.0.0 + , + + 1.0.1 +

@@ -40,9 +44,6 @@

Alignment -

- from 0.5.0 (last updated) -


@@ -215,7 +216,7 @@

- \ No newline at end of file + diff --git a/docs/vocabulary/Annotation/v2/index.html b/docs/vocabulary/Annotation/v2/index.html index 8ddf4994..b4ce542a 100644 --- a/docs/vocabulary/Annotation/v2/index.html +++ b/docs/vocabulary/Annotation/v2/index.html @@ -10,10 +10,10 @@
@@ -28,6 +28,10 @@

1.0.0 + , + + 1.0.1 +

@@ -40,9 +44,6 @@

Annotation -

- from 0.5.0 (last updated) -


@@ -167,7 +168,7 @@

- \ No newline at end of file + diff --git a/docs/vocabulary/AudioDocument/v1/index.html b/docs/vocabulary/AudioDocument/v1/index.html index 6e836198..6669d0d2 100644 --- a/docs/vocabulary/AudioDocument/v1/index.html +++ b/docs/vocabulary/AudioDocument/v1/index.html @@ -10,10 +10,10 @@
@@ -28,6 +28,10 @@

1.0.0 + , + + 1.0.1 +

@@ -46,9 +50,6 @@

AudioDocument -

- from 0.5.0 (last updated) -


@@ -183,7 +184,7 @@

- \ No newline at end of file + diff --git a/docs/vocabulary/BoundingBox/v1/index.html b/docs/vocabulary/BoundingBox/v1/index.html index 20a6c281..cf15687b 100644 --- a/docs/vocabulary/BoundingBox/v1/index.html +++ b/docs/vocabulary/BoundingBox/v1/index.html @@ -10,10 +10,10 @@
@@ -28,6 +28,10 @@

1.0.0 + , + + 1.0.1 +

@@ -58,9 +62,6 @@

BoundingBox -

- from 0.5.0 (last updated) -


@@ -303,7 +304,7 @@

- \ No newline at end of file + diff --git a/docs/vocabulary/Chapter/v2/index.html b/docs/vocabulary/Chapter/v2/index.html new file mode 100644 index 00000000..e6646467 --- /dev/null +++ b/docs/vocabulary/Chapter/v2/index.html @@ -0,0 +1,313 @@ + + + + + Chapter + + + +
+
+ +
+
+
+

+ included in: + + 1.0.1 + +

+
+

+ + Thing + + + > + + + Annotation + + + > + + + Region + + + > + + + Interval + + + > + + + TimeFrame + + + > + + + Chapter + +

+
+

+ + + + + + + + +
+ + Definition + + + Example case for when we do not want to use Segment with a specific segmentType or if we want to introduce special properties. +
+ + URI + + + + http://mmif.clams.ai/vocabulary/Chapter/v2 + +
+

+ Metadata +

+

+ Metadata from Region +

+ + + + + + + + + + + +
+ Property + + Type + + Description +
+ timeUnit + + String + + Specifies which unit of time the measurement is based. Can be *seconds* or *milliseconds*, or in case of annotations on a VideoDocument, *frames*. +
+

+ Metadata from Annotation +

+ + + + + + + + + + + +
+ Property + + Type + + Description +
+ document + + ID + + The identifier of the document that the annotation is over. This has to be defined either at the metadata level, in which case it has scope over all annotations of the same type in a view, or at the instance level, in which it has scope over just the single annotation. +
+

+ Properties +

+ + + + + + + + + + + +
+ Property + + Type + + Description +
+ title + + String + + Title of the chapter +
+

+ Properties from TimeFrame +

+ + + + + + + + + + + +
+ Property + + Type + + Description +
+ frameType + + String + + The type of TimeFrame. Possible values include, but are not limited to, bars, tones, bars-and-tones, speech, noise, music, slate, chyron, lower-third, credits, and other. +
+

+ Properties from Interval +

+ + + + + + + + + + + + + + + + + + + + + +
+ Property + + Type + + Description +
+ start + + Integer + + The starting offset in the primary data. This point is inclusive. For time intervals, the unit is determined by the *timeUnit* metadata key. For text intervals, the unit is Unicode code point. +
+ end + + Integer + + The ending offset in the primary data. This point is exclusive. For time intervals, the unit is determined by the *timeUnit* metadata key. For text intervals, the unit is Unicode code point. +
+ targets + + List of IDs + + IDs of a sequence of annotations covering the region of primary data referred to by this annotation. Used as an alternative to *start* and *end* to point to component annotations (for example a token sequence) rather than directly into primary data, or to link two or more annotations (for example in a coreference annotation). +
+

+ Properties from Annotation +

+ + + + + + + + + + + +
+ Property + + Type + + Description +
+ document + + ID + + The identifier of the document that the annotation is over. +
+

+ Properties from Thing +

+ + + + + + + + + + + +
+ Property + + Type + + Description +
+ id + + ID + + A unique identifier for the annotation or document. Uniqueness is relative to the view the annotation is in or the list of documents at the top level of a MMIF file. + + [Required] + +
+
+ + + + + diff --git a/docs/vocabulary/Document/v1/index.html b/docs/vocabulary/Document/v1/index.html index dfc90fdb..a58cac8a 100644 --- a/docs/vocabulary/Document/v1/index.html +++ b/docs/vocabulary/Document/v1/index.html @@ -10,10 +10,10 @@
@@ -28,6 +28,10 @@

1.0.0 + , + + 1.0.1 +

@@ -40,9 +44,6 @@

Document -

- from 0.5.0 (last updated) -


@@ -174,7 +175,7 @@

- \ No newline at end of file + diff --git a/docs/vocabulary/ImageDocument/v1/index.html b/docs/vocabulary/ImageDocument/v1/index.html index 03a91941..7e9d2700 100644 --- a/docs/vocabulary/ImageDocument/v1/index.html +++ b/docs/vocabulary/ImageDocument/v1/index.html @@ -10,10 +10,10 @@
@@ -28,6 +28,10 @@

1.0.0 + , + + 1.0.1 +

@@ -46,9 +50,6 @@

ImageDocument -

- from 0.5.0 (last updated) -


@@ -183,7 +184,7 @@

- \ No newline at end of file + diff --git a/docs/vocabulary/Interval/v1/index.html b/docs/vocabulary/Interval/v1/index.html index add84a38..c98259e7 100644 --- a/docs/vocabulary/Interval/v1/index.html +++ b/docs/vocabulary/Interval/v1/index.html @@ -10,10 +10,10 @@
@@ -28,6 +28,10 @@

1.0.0 + , + + 1.0.1 +

@@ -52,9 +56,6 @@

Interval -

- from 0.5.0 (last updated) -


@@ -278,7 +279,7 @@

- \ No newline at end of file + diff --git a/docs/vocabulary/Polygon/v1/index.html b/docs/vocabulary/Polygon/v1/index.html index af44ad41..8d53baa5 100644 --- a/docs/vocabulary/Polygon/v1/index.html +++ b/docs/vocabulary/Polygon/v1/index.html @@ -10,10 +10,10 @@
@@ -28,6 +28,10 @@

1.0.0 + , + + 1.0.1 +

@@ -52,9 +56,6 @@

Polygon -

- from 0.5.0 (last updated) -


@@ -270,7 +271,7 @@

- \ No newline at end of file + diff --git a/docs/vocabulary/Region/v1/index.html b/docs/vocabulary/Region/v1/index.html index be262593..3de63dc1 100644 --- a/docs/vocabulary/Region/v1/index.html +++ b/docs/vocabulary/Region/v1/index.html @@ -10,10 +10,10 @@
@@ -28,6 +28,10 @@

1.0.0 + , + + 1.0.1 +

@@ -46,9 +50,6 @@

Region -

- from 0.5.0 (last updated) -


@@ -223,7 +224,7 @@

- \ No newline at end of file + diff --git a/docs/vocabulary/Relation/v1/index.html b/docs/vocabulary/Relation/v1/index.html index 765c2516..2610b0d9 100644 --- a/docs/vocabulary/Relation/v1/index.html +++ b/docs/vocabulary/Relation/v1/index.html @@ -10,10 +10,10 @@
@@ -28,6 +28,10 @@

1.0.0 + , + + 1.0.1 +

@@ -46,9 +50,6 @@

Relation -

- from 0.5.0 (last updated) -


@@ -199,7 +200,7 @@

- \ No newline at end of file + diff --git a/docs/vocabulary/Span/v1/index.html b/docs/vocabulary/Span/v1/index.html index 08ed0dfa..cadf3f54 100644 --- a/docs/vocabulary/Span/v1/index.html +++ b/docs/vocabulary/Span/v1/index.html @@ -10,10 +10,10 @@
@@ -28,6 +28,10 @@

1.0.0 + , + + 1.0.1 +

@@ -58,9 +62,6 @@

Span -

- from 0.5.0 (last updated) -


@@ -116,6 +117,16 @@

+

+ + +
+ Similar to + + + http://vocab.lappsgrid.org/Region + +

Metadata @@ -287,7 +298,7 @@

- \ No newline at end of file + diff --git a/docs/vocabulary/TextDocument/v1/index.html b/docs/vocabulary/TextDocument/v1/index.html index 4340d40a..710cca5a 100644 --- a/docs/vocabulary/TextDocument/v1/index.html +++ b/docs/vocabulary/TextDocument/v1/index.html @@ -10,10 +10,10 @@
@@ -28,6 +28,10 @@

1.0.0 + , + + 1.0.1 +

@@ -46,9 +50,6 @@

TextDocument -

- from 0.5.0 (last updated) -


@@ -207,7 +208,7 @@

- \ No newline at end of file + diff --git a/docs/vocabulary/Thing/v1/index.html b/docs/vocabulary/Thing/v1/index.html index ccbd02ef..63a6e89e 100644 --- a/docs/vocabulary/Thing/v1/index.html +++ b/docs/vocabulary/Thing/v1/index.html @@ -10,10 +10,10 @@
@@ -28,15 +28,16 @@

1.0.0 + , + + 1.0.1 +

Thing -

- from 0.5.0 (last updated) -


@@ -130,7 +131,7 @@

- \ No newline at end of file + diff --git a/docs/vocabulary/TimeFrame/v2/index.html b/docs/vocabulary/TimeFrame/v2/index.html new file mode 100644 index 00000000..3cd4c965 --- /dev/null +++ b/docs/vocabulary/TimeFrame/v2/index.html @@ -0,0 +1,280 @@ + + + + + TimeFrame + + + +
+
+ +
+
+
+

+ included in: + + 1.0.1 + +

+
+

+ + Thing + + + > + + + Annotation + + + > + + + Region + + + > + + + Interval + + + > + + + TimeFrame + +

+
+

+ + + + + + + + +
+ + Definition + + + A temporal interval in an audio or video stream. This is similar to the term segment used in audio processing, but that term has a different meaning in the image and video community. +
+ + URI + + + + http://mmif.clams.ai/vocabulary/TimeFrame/v2 + +
+

+ Metadata +

+

+ Metadata from Region +

+ + + + + + + + + + + +
+ Property + + Type + + Description +
+ timeUnit + + String + + Specifies which unit of time the measurement is based. Can be *seconds* or *milliseconds*, or in case of annotations on a VideoDocument, *frames*. +
+

+ Metadata from Annotation +

+ + + + + + + + + + + +
+ Property + + Type + + Description +
+ document + + ID + + The identifier of the document that the annotation is over. This has to be defined either at the metadata level, in which case it has scope over all annotations of the same type in a view, or at the instance level, in which it has scope over just the single annotation. +
+

+ Properties +

+ + + + + + + + + + + +
+ Property + + Type + + Description +
+ frameType + + String + + The type of TimeFrame. Possible values include, but are not limited to, bars, tones, bars-and-tones, speech, noise, music, slate, chyron, lower-third, credits, and other. +
+

+ Properties from Interval +

+ + + + + + + + + + + + + + + + + + + + + +
+ Property + + Type + + Description +
+ start + + Integer + + The starting offset in the primary data. This point is inclusive. For time intervals, the unit is determined by the *timeUnit* metadata key. For text intervals, the unit is Unicode code point. +
+ end + + Integer + + The ending offset in the primary data. This point is exclusive. For time intervals, the unit is determined by the *timeUnit* metadata key. For text intervals, the unit is Unicode code point. +
+ targets + + List of IDs + + IDs of a sequence of annotations covering the region of primary data referred to by this annotation. Used as an alternative to *start* and *end* to point to component annotations (for example a token sequence) rather than directly into primary data, or to link two or more annotations (for example in a coreference annotation). +
+

+ Properties from Annotation +

+ + + + + + + + + + + +
+ Property + + Type + + Description +
+ document + + ID + + The identifier of the document that the annotation is over. +
+

+ Properties from Thing +

+ + + + + + + + + + + +
+ Property + + Type + + Description +
+ id + + ID + + A unique identifier for the annotation or document. Uniqueness is relative to the view the annotation is in or the list of documents at the top level of a MMIF file. + + [Required] + +
+
+ + + + + diff --git a/docs/vocabulary/TimePoint/v1/index.html b/docs/vocabulary/TimePoint/v1/index.html index 16d3b480..34aebb31 100644 --- a/docs/vocabulary/TimePoint/v1/index.html +++ b/docs/vocabulary/TimePoint/v1/index.html @@ -10,10 +10,10 @@
@@ -28,6 +28,10 @@

1.0.0 + , + + 1.0.1 +

@@ -52,9 +56,6 @@

TimePoint -

- from 0.5.0 (last updated) -


@@ -259,7 +260,7 @@

- \ No newline at end of file + diff --git a/docs/vocabulary/VideoDocument/v1/index.html b/docs/vocabulary/VideoDocument/v1/index.html index 496ff81b..79aaf257 100644 --- a/docs/vocabulary/VideoDocument/v1/index.html +++ b/docs/vocabulary/VideoDocument/v1/index.html @@ -10,10 +10,10 @@
@@ -28,6 +28,10 @@

1.0.0 + , + + 1.0.1 +

@@ -46,9 +50,6 @@

VideoDocument -

- from 0.5.0 (last updated) -


@@ -183,7 +184,7 @@

- \ No newline at end of file + diff --git a/docs/vocabulary/VideoObject/v1/index.html b/docs/vocabulary/VideoObject/v1/index.html index af2cb80f..d1818079 100644 --- a/docs/vocabulary/VideoObject/v1/index.html +++ b/docs/vocabulary/VideoObject/v1/index.html @@ -10,10 +10,10 @@
@@ -28,6 +28,10 @@

1.0.0 + , + + 1.0.1 +

@@ -52,9 +56,6 @@

VideoObject -

- from 0.5.0 (last updated) -


@@ -259,7 +260,7 @@

- \ No newline at end of file + diff --git a/schema/mmif.json b/schema/mmif.json index 8d0b17e3..97119553 100644 --- a/schema/mmif.json +++ b/schema/mmif.json @@ -32,7 +32,10 @@ "type": "object", "patternProperties": { ".+": { - "type": "string" + "anyOf": [ + {"type": "string"}, + {"type": "array", "items": { "type": "string" }} + ] } } }, diff --git a/specifications/index.md b/specifications/index.md index 2c29bd4d..eba136bc 100644 --- a/specifications/index.md +++ b/specifications/index.md @@ -155,7 +155,7 @@ This property contains information about the annotations in a view. Here is an e "document": "m1" } }, - "parameters": {} + "parameters": {"threshold": "0.5", "not-defined-parameter": "some-value"}, } ``` @@ -163,7 +163,7 @@ The `timestamp` key stores when the view was created by the application. This is The `app` key contains an identifier that specifies what application created the view. The identifier must be a URL form, and HTTP webpage pointed by the URL should contain all app metadata information relevant for the application: description, configuration, input/output specifications and a more complete description of what output is created. The app identifier always includes a version number for the app. The metadata should also contain a link to the public code repository for the app (and that repository will actually maintain all the information in the URL). -The `parameters` is a dictionary of parameters and their values, if any, that were handed to the app at the runtime when it was called. +The `parameters` is a dictionary of runtime parameters and their *string* values, if any. The primary purpose of this dictionary is to record the parameters "as-is" for reproducibility and accountability. Note that CLAMS apps are developed to run as HTTP servers, expecting parameters to be passed as URL query strings. Hence, the values in the `parameters` dictionary are always strings or simple lists of strings. The `contains` dictionary has keys that refer to annotation objects in the CLAMS or LAPPS vocabulary, or user-defined objects. Namely, they indicate the kind of annotations that live in the view. The value of each of those keys is a JSON object which contains metadata specified for the annotation type. The example above has one key that indicates that the view contains *TimeFrame* annotations, and it gives two metadata values for that annotation type: diff --git a/vocabulary/clams.vocabulary.yaml b/vocabulary/clams.vocabulary.yaml index ff92bbeb..17e66991 100644 --- a/vocabulary/clams.vocabulary.yaml +++ b/vocabulary/clams.vocabulary.yaml @@ -117,7 +117,8 @@ properties: name: Span parent: Interval -similarTo: http://vocab.lappsgrid.org/Region +similarTo: + - http://vocab.lappsgrid.org/Region description: >- An annotation over a region in primary text data. A Span may be defined by @@ -134,10 +135,13 @@ description: >- segment used in audio processing, but that term has a different meaning in the image and video community. + properties: frameType: type: String - description: The type of TimeFrame. Could be bars-and-tones, speech, noise, music, other. + description: The type of TimeFrame. Possible values include, but are not + limited to, bars, tones, bars-and-tones, speech, noise, music, slate, + chyron, lower-third, credits, and other. ---