|
48 | 48 | NDVideoMasks,
|
49 | 49 | )
|
50 | 50 | from .relationship import NDRelationship
|
51 |
| -from .utils.temporal_processor import AudioTemporalProcessor |
52 | 51 |
|
53 | 52 | AnnotationType = Union[
|
54 | 53 | NDObjectType,
|
@@ -87,6 +86,46 @@ def _get_consecutive_frames(
|
87 | 86 | consecutive.append((group[0], group[-1]))
|
88 | 87 | return consecutive
|
89 | 88 |
|
| 89 | + @classmethod |
| 90 | + def _get_audio_frame_ranges(cls, annotation_group: List[Union[AudioClassificationAnnotation, AudioObjectAnnotation]]) -> List[Tuple[int, int]]: |
| 91 | + """Get frame ranges for audio annotations (simpler than video segments)""" |
| 92 | + return [(ann.frame, getattr(ann, 'end_frame', None) or ann.frame) for ann in annotation_group] |
| 93 | + |
| 94 | + @classmethod |
| 95 | + def _has_changing_values(cls, annotation_group: List[AudioClassificationAnnotation]) -> bool: |
| 96 | + """Check if annotations have different values (multi-value per instance)""" |
| 97 | + if len(annotation_group) <= 1: |
| 98 | + return False |
| 99 | + first_value = annotation_group[0].value.answer |
| 100 | + return any(ann.value.answer != first_value for ann in annotation_group) |
| 101 | + |
| 102 | + @classmethod |
| 103 | + def _create_multi_value_annotation(cls, annotation_group: List[AudioClassificationAnnotation], data): |
| 104 | + """Create annotation with frame-value mapping for changing values""" |
| 105 | + import json |
| 106 | + |
| 107 | + # Build frame data and mapping in one pass |
| 108 | + frames_data = [] |
| 109 | + frame_mapping = {} |
| 110 | + |
| 111 | + for ann in annotation_group: |
| 112 | + start, end = ann.frame, getattr(ann, 'end_frame', None) or ann.frame |
| 113 | + frames_data.append({"start": start, "end": end}) |
| 114 | + frame_mapping[str(start)] = ann.value.answer |
| 115 | + |
| 116 | + # Create content structure |
| 117 | + content = json.dumps({ |
| 118 | + "frame_mapping": frame_mapping, |
| 119 | + }) |
| 120 | + |
| 121 | + # Update template annotation |
| 122 | + template = annotation_group[0] |
| 123 | + from ...annotation_types.classification.classification import Text |
| 124 | + template.value = Text(answer=content) |
| 125 | + template.extra = {"frames": frames_data} |
| 126 | + |
| 127 | + yield NDClassification.from_common(template, data) |
| 128 | + |
90 | 129 | @classmethod
|
91 | 130 | def _get_segment_frame_ranges(
|
92 | 131 | cls,
|
@@ -170,20 +209,35 @@ def _create_video_annotations(
|
170 | 209 | def _create_audio_annotations(
|
171 | 210 | cls, label: Label
|
172 | 211 | ) -> Generator[Union[NDChecklistSubclass, NDRadioSubclass], None, None]:
|
173 |
| - """Create audio annotations using generic temporal processor |
| 212 | + """Create audio annotations with multi-value support""" |
| 213 | + audio_annotations = defaultdict(list) |
| 214 | + |
| 215 | + # Collect audio annotations |
| 216 | + for annot in label.annotations: |
| 217 | + if isinstance(annot, (AudioClassificationAnnotation, AudioObjectAnnotation)): |
| 218 | + audio_annotations[annot.feature_schema_id or annot.name].append(annot) |
174 | 219 |
|
175 |
| - Args: |
176 |
| - label: Label containing audio annotations to be processed |
| 220 | + for annotation_group in audio_annotations.values(): |
| 221 | + frame_ranges = cls._get_audio_frame_ranges(annotation_group) |
| 222 | + |
| 223 | + # Process classifications |
| 224 | + if isinstance(annotation_group[0], AudioClassificationAnnotation): |
| 225 | + if cls._has_changing_values(annotation_group): |
| 226 | + # For audio with changing values, create frame-value mapping |
| 227 | + yield from cls._create_multi_value_annotation(annotation_group, label.data) |
| 228 | + else: |
| 229 | + # Standard processing for audio with same values |
| 230 | + annotation = annotation_group[0] |
| 231 | + frames_data = [{"start": start, "end": end} for start, end in frame_ranges] |
| 232 | + annotation.extra.update({"frames": frames_data}) |
| 233 | + yield NDClassification.from_common(annotation, label.data) |
| 234 | + |
| 235 | + # Process objects |
| 236 | + elif isinstance(annotation_group[0], AudioObjectAnnotation): |
| 237 | + # For audio objects, process individually (simpler than video segments) |
| 238 | + for annotation in annotation_group: |
| 239 | + yield NDObject.from_common(annotation, label.data) |
177 | 240 |
|
178 |
| - Yields: |
179 |
| - NDClassification or NDObject: Audio annotations in NDJSON format |
180 |
| - """ |
181 |
| - # Use processor with configurable behavior |
182 |
| - processor = AudioTemporalProcessor( |
183 |
| - group_text_annotations=True, # Group multiple TEXT annotations into one feature |
184 |
| - enable_token_mapping=True, # Enable per-keyframe token content |
185 |
| - ) |
186 |
| - yield from processor.process_annotations(label) |
187 | 241 |
|
188 | 242 | @classmethod
|
189 | 243 | def _create_non_video_annotations(cls, label: Label):
|
|
0 commit comments