Skip to content

Commit

Permalink
Merge pull request #120 from clamsproject/116-traing-with-challenging
Browse files Browse the repository at this point in the history
accommodating image-level annotations as training/validation data
  • Loading branch information
keighrim authored Nov 25, 2024
2 parents c0aa813 + 626171c commit 2f80099
Show file tree
Hide file tree
Showing 38 changed files with 1,390 additions and 678 deletions.
5 changes: 1 addition & 4 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -257,10 +257,7 @@ $RECYCLE.BIN/

# Files created by data_ingestion or visualization
modeling/features
modeling/results*/*.pt
modeling/results*/*.csv
modeling/results*/*.txt
modeling/results*/*.yml
modeling/results*
modeling/vectorized/*
modeling/html

Expand Down
13 changes: 12 additions & 1 deletion app.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,8 +79,10 @@ def _annotate_timepoints(self, mmif: Mmif, **parameters) -> Mmif:
all_positions = []
t = time.perf_counter()
# in the following, the .glob() should always return only one, otherwise we have a problem
## naming convention from train.py + gridsearch.py = {timestamp}.{backbonename}.{prebinname}.pos{T/F}.pt
## right now, `prebinname` is fixed to `nomap` as we don't use prebinning
model_filestem = next(default_model_storage.glob(
f"*.{parameters['tpModelName']}.pos{'T' if parameters['tpUsePosModel'] else 'F'}.pt")).stem
f"*.{parameters['tpModelName']}.*.pos{'T' if parameters['tpUsePosModel'] else 'F'}.pt")).stem
self.logger.info(f"Initiating classifier with {model_filestem}")
classifier = classify.Classifier(default_model_storage / model_filestem,
self.logger.name if self.logger.isEnabledFor(logging.DEBUG) else None)
Expand Down Expand Up @@ -123,6 +125,8 @@ def _annotate_timepoints(self, mmif: Mmif, **parameters) -> Mmif:
timepoint_annotation.add_property('classification', classification)

def _annotate_timeframes(self, mmif: Mmif, **parameters) -> Mmif:
from modeling.config import bins

TimeFrameTuple = namedtuple('TimeFrame',
['label', 'tf_score', 'targets', 'representatives'])
tp_view = mmif.get_view_contains(AnnotationTypes.TimePoint)
Expand Down Expand Up @@ -152,6 +156,13 @@ def _annotate_timeframes(self, mmif: Mmif, **parameters) -> Mmif:
src_labels = sqh.validate_labelset(tps)

# TODO: fill in `tfLabelMap` parameter value if a preset is used by the user
# first fill in labelMap parameter value if a preset is used by the user
label_map = bins.binning_schemes.get(parameters['tfLabelMapPreset'])
if label_map is None:
label_map = parameters['tfLabelMap']
else:
label_map = {lbl: binname for binname, lbls in label_map.items() for lbl in lbls}
parameters['tfLabelMap'] = label_map
self.logger.debug(f"Label map: {parameters['tfLabelMap']}")
label_remapper = sqh.build_label_remapper(src_labels, parameters['tfLabelMap'])

Expand Down
70 changes: 39 additions & 31 deletions metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
from mmif import DocumentTypes, AnnotationTypes

from modeling import FRAME_TYPES
import modeling.config.bins

default_model_storage = Path(__file__).parent / 'modeling/models'

Expand All @@ -26,16 +27,6 @@ def appmetadata() -> AppMetadata:

available_models = default_model_storage.glob('*.pt')

# This was the most frequent label mapping from the old configuration file,
# which had default mappings for each model.
labelMap = [
"B:bars",
"S:slate",
"I:chyron", "N:chyron", "Y:chyron",
"C:credits", "R:credits",
"W:other_opening", "L:other_opening", "O:other_opening", "M:other_opening",
"E:other_text", "K:other_text", "G:other_text", "T:other_text", "F:other_text"]

metadata = AppMetadata(
name="Scenes-with-text Detection",
description="Detects scenes with text, like slates, chyrons and credits. "
Expand All @@ -57,36 +48,42 @@ def appmetadata() -> AppMetadata:

metadata.add_parameter(
name='useClassifier', type='boolean', default=True,
description='Use the image classifier model to generate TimePoint annotations')
description='Use the image classifier model to generate TimePoint annotations.')
metadata.add_parameter(
name='tpModelName', type='string',
default='convnext_lg',
default='convnext_small',
choices=list(set(m.stem.split('.')[1] for m in available_models)),
description='model name to use for classification, only applies when `useClassifier=true`')
description='Model name to use for classification, only applies when `useClassifier=true`.')
metadata.add_parameter(
name='tpUsePosModel', type='boolean', default=True,
description='Use the model trained with positional features, only applies when `useClassifier=true`')
description='Use the model trained with positional features, only applies when `useClassifier=true`.')
metadata.add_parameter(
name='tpStartAt', type='integer', default=0,
description='Number of milliseconds into the video to start processing, only applies when `useClassifier=true`')
description='Number of milliseconds into the video to start processing, only applies when `useClassifier=true`.')
metadata.add_parameter(
name='tpStopAt', type='integer', default=sys.maxsize,
description='Number of milliseconds into the video to stop processing, only applies when `useClassifier=true`')
description='Number of milliseconds into the video to stop processing, only applies when `useClassifier=true`.')
metadata.add_parameter(
name='tpSampleRate', type='integer', default=1000,
description='Milliseconds between sampled frames, only applies when `useClassifier=true`')
description='Milliseconds between sampled frames, only applies when `useClassifier=true`.')
metadata.add_parameter(
name='useStitcher', type='boolean', default=True,
description='Use the stitcher after classifying the TimePoints')
description='Use the stitcher after classifying the TimePoints.')
metadata.add_parameter(
name='tfMinTPScore', type='number', default=0.5,
description='Minimum score for a TimePoint to be included in a TimeFrame, only applies when `useStitcher=true`')
description='Minimum score for a TimePoint to be included in a TimeFrame. '
'A lower value will include more TimePoints in the TimeFrame '
'(increasing recall in exchange for precision). '
'Only applies when `useStitcher=true`.')
metadata.add_parameter(
name='tfMinTFScore', type='number', default=0.9,
description='Minimum score for a TimeFrame, only applies when `useStitcher=true`')
description='Minimum score for a TimeFrame. '
'A lower value will include more TimeFrames in the output '
'(increasing recall in exchange for precision). '
'Only applies when `useStitcher=true`')
metadata.add_parameter(
name='tfMinTFDuration', type='integer', default=5000,
description='Minimum duration of a TimeFrame in milliseconds, only applies when `useStitcher=true`')
description='Minimum duration of a TimeFrame in milliseconds, only applies when `useStitcher=true`.')
metadata.add_parameter(
name='tfAllowOverlap', type='boolean', default=False,
description='Allow overlapping time frames, only applies when `useStitcher=true`')
Expand All @@ -96,17 +93,28 @@ def appmetadata() -> AppMetadata:
'multiple representative points to follow any changes in the scene. '
'Only applies when `useStitcher=true`')
metadata.add_parameter(
# TODO: do we want to use the old default labelMap from the configuration here or
# do we truly want an empty mapping and use the pass-through, as hinted at in the
# description (which is now not in sync with the code).
name='tfLabelMap', type='map', default=labelMap,
name='tfLabelMap', type='map', default=[],
description=(
'Mapping of a label in the input annotations to a new label. Must be formatted as '
'IN_LABEL:OUT_LABEL (with a colon). To pass multiple mappings, use this parameter '
'multiple times. By default, all the input labels are passed as is, including any '
'negative labels (with default value being no remapping at all). However, when '
'at least one label is remapped, all the other "unset" labels are discarded as '
'a negative label. Only applies when `useStitcher=true`'))
'(See also `tfLabelMapPreset`, set `tfLabelMapPreset=nopreset` to make sure that a preset does not '
'override `tfLabelMap` when using this) Mapping of a label in the input TimePoint annotations to a new '
'label of the stitched TimeFrame annotations. Must be formatted as IN_LABEL:OUT_LABEL (with a colon). To '
'pass multiple mappings, use this parameter multiple times. When two+ TP labels are mapped to a TF '
'label, it essentially works as a "binning" operation. If no mapping is used, all the input labels are '
'passed-through, meaning no change in both TP & TF labelsets. However, when at least one label is mapped, '
'all the other "unset" labels are mapped to the negative label (`-`) and if `-` does not exist in the TF '
'labelset, it is added automatically. '
'Only applies when `useStitcher=true`.'))
labelMapPresetsReformat = {schname: str([f'`{lbl}`:`{binname}`'
for binname, lbls in scheme.items()
for lbl in lbls])
for schname, scheme in modeling.config.bins.binning_schemes.items()}
labelMapPresetsMarkdown = '\n'.join([f"- `{k}`: {v}" for k, v in labelMapPresetsReformat.items()])
metadata.add_parameter(
name='tfLabelMapPreset', type='string', default='relaxed',
choices=list(modeling.config.bins.binning_schemes.keys()),
description=f'(See also `tfLabelMap`) Preset alias of a label mapping. If not `nopreset`, this parameter will '
f'override the `tfLabelMap` parameter. Available presets are:\n{labelMapPresetsMarkdown}\n\n '
f'Only applies when `useStitcher=true`.')

return metadata

Expand Down
12 changes: 8 additions & 4 deletions modeling/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,14 @@
positive_label = '+'

# full typology from https://github.com/clamsproject/app-swt-detection/issues/1
FRAME_TYPES = ["B", "S", "W", "L", "O",
"M", "I", "N", "E", "P", "Y", "K", "G", "T", "F", "C", "R"]
FRAME_TYPES_WITH_SUBTYPES = ["B", "SH", "SC", "SD", "SB", "SG", "W", "L", "O",
"M", "I", "N", "E", "P", "Y", "K", "G", "T", "F", "C", "R"]
FRAME_TYPES = [
"B", "S", "I", "C", "R", "M", "O", "W",
"N", "Y", "U", "K",
"L", "G", "F", "E", "T",
"P",
]
FRAME_TYPES_WITH_SUBTYPES = FRAME_TYPES.copy() + ['SH', 'SC', 'SD', 'SB', 'SG']
FRAME_TYPES_WITH_SUBTYPES.remove('S')

# These are time frames that are typically static (that is, the text does not
# move around or change as with rolling credits). These are frame names after
Expand Down
20 changes: 10 additions & 10 deletions modeling/backbones.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,16 +46,6 @@ class ExtractorModel:
# TODO/REVIEW - do we want to be able to change the weight versions (IMAGENET1K_V1 etc)
# ==========================================|
# ConvNext Models
class ConvnextBaseExtractor(ExtractorModel):
name = "convnext_base"
dim = 1024

def __init__(self):
self.model = convnext_base(weights=ConvNeXt_Base_Weights.IMAGENET1K_V1)
self.model.classifier[-1] = torch.nn.Identity()
self.preprocess = ConvNeXt_Base_Weights.IMAGENET1K_V1.transforms()


class ConvnextTinyExtractor(ExtractorModel):
name = "convnext_tiny"
dim = 768
Expand All @@ -76,6 +66,16 @@ def __init__(self):
self.preprocess = ConvNeXt_Small_Weights.IMAGENET1K_V1.transforms()


class ConvnextBaseExtractor(ExtractorModel):
name = "convnext_base"
dim = 1024

def __init__(self):
self.model = convnext_base(weights=ConvNeXt_Base_Weights.IMAGENET1K_V1)
self.model.classifier[-1] = torch.nn.Identity()
self.preprocess = ConvNeXt_Base_Weights.IMAGENET1K_V1.transforms()


class ConvnextLargeExtractor(ExtractorModel):
name = "convnext_lg"
dim = 1536
Expand Down
2 changes: 1 addition & 1 deletion modeling/classify.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ def __init__(self, model_stem, logger_name=None):
model_config_file = f"{model_stem}.yml"
model_checkpoint = f"{model_stem}.pt"
model_config = yaml.safe_load(open(model_config_file))
self.training_labels = train.pretraining_binned_label(model_config)
self.training_labels = train.get_prebinned_labelset(model_config)
self.featurizer = data_loader.FeatureExtractor(**model_config)
label_count = len(FRAME_TYPES) + 1
if 'bins' in model_config:
Expand Down
Empty file added modeling/config/__init__.py
Empty file.
146 changes: 146 additions & 0 deletions modeling/config/batches.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,146 @@
# training batches see https://github.com/clamsproject/aapb-annotations/tree/main/batches for more details
unintersting_guids = ["cpb-aacip-254-75r7szdz"] # the most "uninteresting" video (88/882 frames annotated)
aapb_collaboration_27_a = [
"cpb-aacip-129-88qc000k",
"cpb-aacip-f2c34dd1cd4",
"cpb-aacip-191-40ksn47s",
"cpb-aacip-507-028pc2tp2z",
"cpb-aacip-507-0k26970f2d",
"cpb-aacip-507-0z70v8b17g",
"cpb-aacip-512-542j67b12n",
"cpb-aacip-394-149p8fcw",
"cpb-aacip-08fb0e1f287",
"cpb-aacip-512-t43hx1753b",
"cpb-aacip-d0f2569e145",
"cpb-aacip-d8ebafee30e",
"cpb-aacip-c72fd5cbadc",
"cpb-aacip-b6a2a39b7eb",
"cpb-aacip-512-4b2x34nv4t",
"cpb-aacip-512-416sx65d21",
"cpb-aacip-512-3f4kk95f7h",
"cpb-aacip-512-348gf0nn4f",
"cpb-aacip-516-cc0tq5s94c",
"cpb-aacip-516-8c9r20sq57",
]
aapb_collaboration_27_b = [
"cpb-aacip-254-75r7szdz",
"cpb-aacip-259-4j09zf95",
"cpb-aacip-526-hd7np1xn78",
"cpb-aacip-75-72b8h82x",
"cpb-aacip-fe9efa663c6",
"cpb-aacip-f5847a01db5",
"cpb-aacip-f2a88c88d9d",
"cpb-aacip-ec590a6761d",
"cpb-aacip-c7c64922fcd",
"cpb-aacip-f3fa7215348",
"cpb-aacip-f13ae523e20",
"cpb-aacip-e7a25f07d35",
"cpb-aacip-ce6d5e4bd7f",
"cpb-aacip-690722078b2",
"cpb-aacip-e649135e6ec",
"cpb-aacip-15-93gxdjk6",
"cpb-aacip-512-4f1mg7h078",
"cpb-aacip-512-4m9183583s",
"cpb-aacip-512-4b2x34nt7g",
"cpb-aacip-512-3n20c4tr34",
"cpb-aacip-512-3f4kk9534t",
]
aapb_collaboration_27_c = [
"cpb-aacip-0d338c39a45",
"cpb-aacip-0acac5e9db7",
"cpb-aacip-0bdc7c8ecc5",
"cpb-aacip-1032b1787b4",
"cpb-aacip-516-qf8jd4qq96",
"cpb-aacip-259-kh0dzd78",
"cpb-aacip-259-nc5sb374",
"cpb-aacip-259-mw28cq94",
"cpb-aacip-259-mc8rg22j",
"cpb-aacip-259-5717pw8g",
"cpb-aacip-259-pr7msz5c",
"cpb-aacip-259-g737390m",
"cpb-aacip-259-pc2t780t",
"cpb-aacip-259-q814r90k",
"cpb-aacip-259-cz325478",
"cpb-aacip-259-vh5cgj9t",
"cpb-aacip-259-gt5ff704",
"cpb-aacip-259-gx44t714",
"cpb-aacip-259-pr7msz3w",
"cpb-aacip-259-zg6g5589",
]
aapb_collaboration_27_d = [
"cpb-aacip-259-wh2dcb8p"
] # this is kept for evaluation set, should not be used for training!!!

# new image-level annotation added after v6.1
# "challenging images" from later annotation (`bm` set and `pbd` set, 60 videos, 2024 summer)
# recorded as `aapb-collaboration-27-e` in the annotation repo
guids_with_challenging_images_bm = [
"cpb-aacip-00a9ed7f2ba",
"cpb-aacip-0ace30f582d",
"cpb-aacip-0ae98c2c4b2",
"cpb-aacip-0b0c0afdb11",
"cpb-aacip-0bb992d2e7f",
"cpb-aacip-0c0374c6c55",
"cpb-aacip-0c727d4cac3",
"cpb-aacip-0c74795718b",
"cpb-aacip-0cb2aebaeba",
"cpb-aacip-0d74af419eb",
"cpb-aacip-0dbb0610457",
"cpb-aacip-0dfbaaec869",
"cpb-aacip-0e2dc840bc6",
"cpb-aacip-0ed7e315160",
"cpb-aacip-0f3879e2f22",
"cpb-aacip-0f80359ada5",
"cpb-aacip-0f80a4f5ed2",
"cpb-aacip-0fe3e4311e1",
"cpb-aacip-1a365705273",
"cpb-aacip-1b295839145",
]
guids_with_challenging_images_pbd = [
"cpb-aacip-110-16c2ftdq",
"cpb-aacip-120-1615dwkg",
"cpb-aacip-120-203xsm67",
"cpb-aacip-15-70msck27",
"cpb-aacip-16-19s1rw84",
"cpb-aacip-17-07tmq941",
"cpb-aacip-17-58bg87rx",
"cpb-aacip-17-65v6xv27",
"cpb-aacip-17-81jhbz0g",
"cpb-aacip-29-61djhjcx",
"cpb-aacip-29-8380gksn",
"cpb-aacip-41-322bvxmn",
"cpb-aacip-41-42n5tj3d",
"cpb-aacip-110-35gb5r94",
"cpb-aacip-111-655dvd99",
"cpb-aacip-120-19s1rrsp",
"cpb-aacip-120-31qfv097",
"cpb-aacip-120-73pvmn2q",
"cpb-aacip-120-80ht7h8d",
"cpb-aacip-120-8279d01c",
"cpb-aacip-120-83xsjcb2",
"cpb-aacip-17-88qc0md1",
"cpb-aacip-35-36tx99h9",
"cpb-aacip-42-78tb31b1",
"cpb-aacip-52-84zgn1wb",
"cpb-aacip-52-87pnw5t0",
"cpb-aacip-55-84mkmvwx",
"cpb-aacip-75-13905w9q",
"cpb-aacip-75-54xgxnzg",
"cpb-aacip-77-02q5807j",
"cpb-aacip-77-074tnfhr",
"cpb-aacip-77-1937qsxt",
"cpb-aacip-77-214mx491",
"cpb-aacip-77-24jm6zc8",
"cpb-aacip-77-35t77b2v",
"cpb-aacip-77-44bp0mdh",
"cpb-aacip-77-49t1h3fv",
"cpb-aacip-77-81jhbv89",
"cpb-aacip-83-074tmx7h",
"cpb-aacip-83-23612txx",
]
aapb_collaboration_27_e = guids_with_challenging_images_bm + guids_with_challenging_images_pbd

# this `pbd` subset contains 40 videos with 15328 (non-transitional) + 557 (transitional) = 15885 frames
# then updated with more annotations 19331 (non-transitional) + 801 (transitional) = 20132 frames
# we decided to use this subset for the fixed validation set (#116)
guids_for_fixed_validation_set = guids_with_challenging_images_pbd
Loading

0 comments on commit 2f80099

Please sign in to comment.