Labelbox · rishisurana-labelbox · Oct 14, 2025 · Sep 3, 2025 · Sep 8, 2025 · Sep 8, 2025
diff --git a/examples/annotation_import/audio.ipynb b/examples/annotation_import/audio.ipynb
@@ -170,7 +170,7 @@
     },
     {
       "metadata": {},
-      "source": "ontology_builder = lb.OntologyBuilder(classifications=[\n    lb.Classification(class_type=lb.Classification.Type.TEXT,\n                      name=\"text_audio\"),\n    lb.Classification(\n        class_type=lb.Classification.Type.CHECKLIST,\n        name=\"checklist_audio\",\n        options=[\n            lb.Option(value=\"first_checklist_answer\"),\n            lb.Option(value=\"second_checklist_answer\"),\n        ],\n    ),\n    lb.Classification(\n        class_type=lb.Classification.Type.RADIO,\n        name=\"radio_audio\",\n        options=[\n            lb.Option(value=\"first_radio_answer\"),\n            lb.Option(value=\"second_radio_answer\"),\n        ],\n    ),\n])\n\nontology = client.create_ontology(\n    \"Ontology Audio Annotations\",\n    ontology_builder.asdict(),\n    media_type=lb.MediaType.Audio,\n)",
+      "source": "ontology_builder = lb.OntologyBuilder(classifications=[\n    lb.Classification(class_type=lb.Classification.Type.TEXT,\n                      name=\"text_audio\"),\n    lb.Classification(\n        class_type=lb.Classification.Type.CHECKLIST,\n        name=\"checklist_audio\",\n        options=[\n            lb.Option(value=\"first_checklist_answer\"),\n            lb.Option(value=\"second_checklist_answer\"),\n        ],\n    ),\n    lb.Classification(\n        class_type=lb.Classification.Type.RADIO,\n        name=\"radio_audio\",\n        options=[\n            lb.Option(value=\"first_radio_answer\"),\n            lb.Option(value=\"second_radio_answer\"),\n        ],\n    ),\n    # Temporal classification for token-level annotations\n    lb.Classification(\n        class_type=lb.Classification.Type.TEXT,\n        name=\"User Speaker\",\n        scope=lb.Classification.Scope.INDEX,  # INDEX scope for temporal\n    ),\n])\n\nontology = client.create_ontology(\n    \"Ontology Audio Annotations\",\n    ontology_builder.asdict(),\n    media_type=lb.MediaType.Audio,\n)",
       "cell_type": "code",
       "outputs": [],
       "execution_count": null
@@ -252,6 +252,40 @@
       ],
       "cell_type": "markdown"
     },
+    {
+      "metadata": {},
+      "source": [
+        "## Temporal Audio Annotations\n",
+        "\n",
+        "Labelbox supports temporal annotations for audio/video with frame-level precision using the new temporal classification API.\n",
+        "\n",
+        "### Key Features:\n",
+        "- **Frame-based timing**: All annotations use millisecond precision\n",
+        "- **Deep nesting**: Support for multi-level nested classifications (Text > Text > Text, Radio > Radio > Radio, etc.)\n",
+        "- **Inductive structures**: Multiple parent values can share nested classifications that are automatically split based on frame overlap\n",
+        "- **Frame validation**: Frames start at 1 (not 0) and must be non-overlapping for Text and Radio siblings\n",
+        "\n",
+        "### Important Constraints:\n",
+        "1. **Frame indexing**: Frames are 1-based (frame 0 is invalid)\n",
+        "2. **Non-overlapping siblings**: Text and Radio classifications at the same level cannot have overlapping frame ranges\n",
+        "3. **Overlapping checklists**: Only Checklist answers can have overlapping frame ranges with their siblings"
+      ],
+      "cell_type": "markdown"
+    },
+    {
+      "metadata": {},
+      "source": "# Define tokens with precise timing (from demo script)\ntokens_data = [\n    (\"Hello\", 586, 770),  # Hello: frames 586-770\n    (\"AI\", 771, 955),  # AI: frames 771-955\n    (\"how\", 956, 1140),  # how: frames 956-1140\n    (\"are\", 1141, 1325),  # are: frames 1141-1325\n    (\"you\", 1326, 1510),  # you: frames 1326-1510\n    (\"doing\", 1511, 1695),  # doing: frames 1511-1695\n    (\"today\", 1696, 1880),  # today: frames 1696-1880\n]\n\n# Create temporal annotations for each token\ntemporal_annotations = []\nfor token, start_frame, end_frame in tokens_data:\n    token_annotation = lb_types.AudioClassificationAnnotation(\n        frame=start_frame,\n        end_frame=end_frame,\n        name=\"User Speaker\",\n        value=lb_types.Text(answer=token),\n    )\n    temporal_annotations.append(token_annotation)\n\nprint(f\"Created {len(temporal_annotations)} temporal token annotations\")",
+      "cell_type": "code",
+      "outputs": [],
+      "execution_count": null
+    },
+    {
+      "metadata": {},
+      "source": "# Create label with both regular and temporal annotations\nlabel_with_temporal = []\nlabel_with_temporal.append(\n    lb_types.Label(\n        data={\"global_key\": global_key},\n        annotations=[text_annotation, checklist_annotation, radio_annotation] +\n        temporal_annotations,\n    ))\n\nprint(\n    f\"Created label with {len(label_with_temporal[0].annotations)} total annotations\"\n)\nprint(\"  - Regular annotations: 3\")\nprint(f\"  - Temporal annotations: {len(temporal_annotations)}\")",
+      "cell_type": "code",
+      "outputs": [],
+      "execution_count": null
+    },
     {
       "metadata": {},
       "source": [
@@ -260,6 +294,13 @@
       ],
       "cell_type": "markdown"
     },
+    {
+      "metadata": {},
+      "source": "# Upload temporal annotations via MAL\ntemporal_upload_job = lb.MALPredictionImport.create_from_objects(\n    client=client,\n    project_id=project.uid,\n    name=f\"temporal_mal_job-{str(uuid.uuid4())}\",\n    predictions=label_with_temporal,\n)\n\ntemporal_upload_job.wait_until_done()\nprint(\"Temporal upload completed!\")\nprint(\"Errors:\", temporal_upload_job.errors)\nprint(\"Status:\", temporal_upload_job.statuses)",
+      "cell_type": "code",
+      "outputs": [],
+      "execution_count": null
+    },
     {
       "metadata": {},
       "source": "# Upload our label using Model-Assisted Labeling\nupload_job = lb.MALPredictionImport.create_from_objects(\n    client=client,\n    project_id=project.uid,\n    name=f\"mal_job-{str(uuid.uuid4())}\",\n    predictions=label,\n)\n\nupload_job.wait_until_done()\nprint(\"Errors:\", upload_job.errors)\nprint(\"Status of uploads: \", upload_job.statuses)",

@@ -19,6 +19,10 @@
 from .video import MaskInstance
 from .video import VideoMaskAnnotation
 
+from .temporal import TemporalClassificationText
+from .temporal import TemporalClassificationQuestion
+from .temporal import TemporalClassificationAnswer
+
 from .ner import ConversationEntity
 from .ner import DocumentEntity
 from .ner import DocumentTextSelection
@@ -59,3 +63,70 @@
     MessageRankingTask,
     MessageEvaluationTaskAnnotation,
 )
+
+__all__ = [
+    # Geometry
+    "Line",
+    "Point",
+    "Mask",
+    "Polygon",
+    "Rectangle",
+    "Geometry",
+    "DocumentRectangle",
+    "RectangleUnit",
+    # Annotation
+    "ClassificationAnnotation",
+    "ObjectAnnotation",
+    # Relationship
+    "RelationshipAnnotation",
+    "Relationship",
+    # Video
+    "VideoClassificationAnnotation",
+    "VideoObjectAnnotation",
+    "MaskFrame",
+    "MaskInstance",
+    "VideoMaskAnnotation",
+    # Temporal
+    "TemporalClassificationText",
+    "TemporalClassificationQuestion",
+    "TemporalClassificationAnswer",
+    # NER
+    "ConversationEntity",
+    "DocumentEntity",
+    "DocumentTextSelection",
+    "TextEntity",
+    # Classification
+    "Checklist",
+    "ClassificationAnswer",
+    "Radio",
+    "Text",
+    # Data
+    "GenericDataRowData",
+    "MaskData",
+    # Label
+    "Label",
+    "LabelGenerator",
+    # Metrics
+    "ScalarMetric",
+    "ScalarMetricAggregation",
+    "ConfusionMatrixMetric",
+    "ConfusionMatrixAggregation",
+    "ScalarMetricValue",
+    "ConfusionMatrixMetricValue",
+    # Tiled Image
+    "EPSG",
+    "EPSGTransformer",
+    "TiledBounds",
+    "TiledImageData",
+    "TileLayer",
+    # LLM Prompt Response
+    "PromptText",
+    "PromptClassificationAnnotation",
+    # MMC
+    "MessageInfo",
+    "OrderedMessageInfo",
+    "MessageSingleSelectionTask",
+    "MessageMultiSelectionTask",
+    "MessageRankingTask",
+    "MessageEvaluationTaskAnnotation",
+]
@@ -13,6 +13,10 @@
 from .metrics import ScalarMetric, ConfusionMatrixMetric
 from .video import VideoClassificationAnnotation
 from .video import VideoObjectAnnotation, VideoMaskAnnotation
+from .temporal import (
+    TemporalClassificationText,
+    TemporalClassificationQuestion,
+)
 from .mmc import MessageEvaluationTaskAnnotation
 from pydantic import BaseModel, field_validator
 
@@ -44,6 +48,8 @@ class Label(BaseModel):
             ClassificationAnnotation,
             ObjectAnnotation,
             VideoMaskAnnotation,
+            TemporalClassificationText,
+            TemporalClassificationQuestion,
             ScalarMetric,
             ConfusionMatrixMetric,
             RelationshipAnnotation,
@@ -63,8 +69,22 @@ def validate_data(cls, data):
     def object_annotations(self) -> List[ObjectAnnotation]:
         return self._get_annotations_by_type(ObjectAnnotation)
 
-    def classification_annotations(self) -> List[ClassificationAnnotation]:
-        return self._get_annotations_by_type(ClassificationAnnotation)
+    def classification_annotations(
+        self,
+    ) -> List[
+        Union[
+            ClassificationAnnotation,
+            TemporalClassificationText,
+            TemporalClassificationQuestion,
+        ]
+    ]:
+        return self._get_annotations_by_type(
+            (
+                ClassificationAnnotation,
+                TemporalClassificationText,
+                TemporalClassificationQuestion,
+            )
+        )
 
     def _get_annotations_by_type(self, annotation_type):
         return [
@@ -75,15 +95,58 @@ def _get_annotations_by_type(self, annotation_type):
 
     def frame_annotations(
         self,
-    ) -> Dict[str, Union[VideoObjectAnnotation, VideoClassificationAnnotation]]:
+    ) -> Dict[
+        Union[int, None],
+        List[
+            Union[
+                VideoObjectAnnotation,
+                VideoClassificationAnnotation,
+                TemporalClassificationText,
+                TemporalClassificationQuestion,
+            ]
+        ],
+    ]:
+        """Get temporal annotations organized by frame
+
+        Returns:
+            Dict[int, List]: Dictionary mapping frame (milliseconds) to list of temporal annotations
+
+        Example:
+            >>> label.frame_annotations()
+            {2500: [VideoClassificationAnnotation(...), TemporalClassificationText(...)]}
+
+        Note:
+            For TemporalClassificationText/Question, returns dictionary mapping to start of first frame range.
+            These annotations may have multiple discontinuous frame ranges.
+        """
         frame_dict = defaultdict(list)
         for annotation in self.annotations:
             if isinstance(
                 annotation,
                 (VideoObjectAnnotation, VideoClassificationAnnotation),
             ):
                 frame_dict[annotation.frame].append(annotation)
-        return frame_dict
+            elif isinstance(
+                annotation,
+                (TemporalClassificationText, TemporalClassificationQuestion),
+            ):
+                # For temporal annotations with multiple values/answers, use first frame
+                if (
+                    isinstance(annotation, TemporalClassificationText)
+                    and annotation.value
+                ):
+                    frame_dict[annotation.value[0][0]].append(
+                        annotation
+                    )  # value[0][0] is start_frame
+                elif (
+                    isinstance(annotation, TemporalClassificationQuestion)
+                    and annotation.value
+                ):
+                    if annotation.value[0].frames:
+                        frame_dict[annotation.value[0].frames[0][0]].append(
+                            annotation
+                        )  # frames[0][0] is start_frame
+        return dict(frame_dict)
 
     def add_url_to_masks(self, signer) -> "Label":
         """