From c05c590cc66c0c5c1902d8c085b657080fe0d9a4 Mon Sep 17 00:00:00 2001 From: Madison Swain-Bowden Date: Tue, 17 Sep 2024 14:48:09 -0700 Subject: [PATCH] Specify insert fields and unique indices for Rekognition tags insert --- .../data_augmentation/rekognition/add_rekognition_labels.py | 2 ++ .../migrated-cccatalog-archives/image_analysis_labels.jsonl | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/catalog/dags/data_augmentation/rekognition/add_rekognition_labels.py b/catalog/dags/data_augmentation/rekognition/add_rekognition_labels.py index 3a7149aba5a..15b454c818d 100644 --- a/catalog/dags/data_augmentation/rekognition/add_rekognition_labels.py +++ b/catalog/dags/data_augmentation/rekognition/add_rekognition_labels.py @@ -60,6 +60,8 @@ def _insert_tags(tags_buffer: types.TagsBuffer, postgres_conn_id: str): tags_buffer, executemany=True, replace=True, + target_fields=["identifier", "tags"], + replace_index="identifier", ) diff --git a/catalog/tests/s3-data/migrated-cccatalog-archives/image_analysis_labels.jsonl b/catalog/tests/s3-data/migrated-cccatalog-archives/image_analysis_labels.jsonl index ef609c92f6b..6921588ef40 100644 --- a/catalog/tests/s3-data/migrated-cccatalog-archives/image_analysis_labels.jsonl +++ b/catalog/tests/s3-data/migrated-cccatalog-archives/image_analysis_labels.jsonl @@ -196,5 +196,5 @@ {"image_uuid":"1bde95a5-2cbd-4d2b-a4ce-223c248d3ce9","response":{"Labels":[{"Name":"Bronze","Confidence":93.5422744751,"Instances":[],"Parents":[]},{"Name":"Wood","Confidence":87.76222229,"Instances":[],"Parents":[]},{"Name":"Figurine","Confidence":61.1347007751,"Instances":[],"Parents":[]},{"Name":"Fossil","Confidence":58.6593818665,"Instances":[],"Parents":[]}],"LabelModelVersion":"2.0","ResponseMetadata":{"RequestId":"b4d1db94-ad41-436d-9347-158a49846bc3","HTTPStatusCode":200,"HTTPHeaders":{"content-type":"application\/x-amz-json-1.1","date":"Thu, 29 Oct 2020 19:56:58 GMT","x-amzn-requestid":"b4d1db94-ad41-436d-9347-158a49846bc3","content-length":"347","connection":"keep-alive"},"RetryAttempts":0}}} {"image_uuid":"bad46ce2-7f30-4215-a665-c2a02bad3457","response":{"Labels":[{"Name":"Diagram","Confidence":97.5646286011,"Instances":[],"Parents":[]},{"Name":"Map","Confidence":97.5646286011,"Instances":[],"Parents":[{"Name":"Diagram"}]},{"Name":"Poster","Confidence":97.125038147,"Instances":[],"Parents":[{"Name":"Advertisement"}]},{"Name":"Flyer","Confidence":97.125038147,"Instances":[{"BoundingBox":{"Width":0.8941606283,"Height":0.8436123133,"Left":0.0526748672,"Top":0.0817656517},"Confidence":97.125038147}],"Parents":[{"Name":"Poster"},{"Name":"Paper"},{"Name":"Advertisement"}]},{"Name":"Advertisement","Confidence":97.125038147,"Instances":[],"Parents":[]},{"Name":"Paper","Confidence":97.125038147,"Instances":[],"Parents":[]},{"Name":"Brochure","Confidence":97.125038147,"Instances":[],"Parents":[{"Name":"Poster"},{"Name":"Paper"},{"Name":"Advertisement"}]},{"Name":"Plot","Confidence":95.9586868286,"Instances":[],"Parents":[]},{"Name":"Atlas","Confidence":94.5617141724,"Instances":[],"Parents":[{"Name":"Plot"},{"Name":"Map"},{"Name":"Diagram"}]},{"Name":"Nature","Confidence":56.4241638184,"Instances":[],"Parents":[]}],"LabelModelVersion":"2.0","ResponseMetadata":{"RequestId":"c547f53e-d877-4700-9094-78cce6137579","HTTPStatusCode":200,"HTTPHeaders":{"content-type":"application\/x-amz-json-1.1","date":"Thu, 29 Oct 2020 19:56:58 GMT","x-amzn-requestid":"c547f53e-d877-4700-9094-78cce6137579","content-length":"1176","connection":"keep-alive"},"RetryAttempts":0}}} {"image_uuid":"24181325-2188-4c2f-9e95-51fbf6144a7a","response":{"Labels":[{"Name":"Hair Slide","Confidence":86.4182739258,"Instances":[],"Parents":[]},{"Name":"Bronze","Confidence":71.7022628784,"Instances":[],"Parents":[]}],"LabelModelVersion":"2.0","ResponseMetadata":{"RequestId":"e4352a69-42ed-4e94-9684-edda57d40ebd","HTTPStatusCode":200,"HTTPHeaders":{"content-type":"application\/x-amz-json-1.1","date":"Thu, 29 Oct 2020 19:56:59 GMT","x-amzn-requestid":"e4352a69-42ed-4e94-9684-edda57d40ebd","content-length":"196","connection":"keep-alive"},"RetryAttempts":0}}} -{"image_uuid":"e86b82d7-eea4-44dc-8e0a-4d22f5f68ef3","response":{"Labels":[{"Name":"Plant","Confidence":97.7835769653,"Instances":[],"Parents":[]},{"Name":"Vase","Confidence":96.293296814,"Instances":[],"Parents":[{"Name":"Jar"},{"Name":"Pottery"}]},{"Name":"Pottery","Confidence":96.293296814,"Instances":[],"Parents":[]},{"Name":"Jar","Confidence":96.293296814,"Instances":[],"Parents":[]},{"Name":"Ikebana","Confidence":95.8563232422,"Instances":[],"Parents":[{"Name":"Art"},{"Name":"Vase"},{"Name":"Ornament"},{"Name":"Flower Arrangement"},{"Name":"Jar"},{"Name":"Pottery"},{"Name":"Flower"},{"Name":"Plant"}]},{"Name":"Flower","Confidence":95.8563232422,"Instances":[],"Parents":[{"Name":"Plant"}]},{"Name":"Art","Confidence":95.8563232422,"Instances":[],"Parents":[]},{"Name":"Ornament","Confidence":95.8563232422,"Instances":[],"Parents":[]},{"Name":"Blossom","Confidence":95.8563232422,"Instances":[],"Parents":[{"Name":"Plant"}]},{"Name":"Flower Arrangement","Confidence":95.8563232422,"Instances":[],"Parents":[{"Name":"Flower"},{"Name":"Plant"}]},{"Name":"Grass","Confidence":69.0845184326,"Instances":[],"Parents":[{"Name":"Plant"}]},{"Name":"Envelope","Confidence":66.0714263916,"Instances":[],"Parents":[]},{"Name":"Mail","Confidence":66.0714263916,"Instances":[],"Parents":[{"Name":"Envelope"}]},{"Name":"Text","Confidence":62.6300048828,"Instances":[],"Parents":[]},{"Name":"Page","Confidence":62.1074295044,"Instances":[],"Parents":[{"Name":"Text"}]},{"Name":"Paper","Confidence":57.3788146973,"Instances":[],"Parents":[]},{"Name":"Advertisement","Confidence":57.3788146973,"Instances":[],"Parents":[]},{"Name":"Flyer","Confidence":57.3788146973,"Instances":[{"BoundingBox":{"Width":0.9246006012,"Height":0.9213131666,"Left":0.0441937037,"Top":0.0583134666},"Confidence":57.3788146973}],"Parents":[{"Name":"Poster"},{"Name":"Paper"},{"Name":"Advertisement"}]},{"Name":"Poster","Confidence":57.3788146973,"Instances":[],"Parents":[{"Name":"Advertisement"}]},{"Name":"Brochure","Confidence":57.3788146973,"Instances":[],"Parents":[{"Name":"Poster"},{"Name":"Paper"},{"Name":"Advertisement"}]}],"LabelModelVersion":"2.0","ResponseMetadata":{"RequestId":"80f78525-c82f-47e9-b885-fee73c64017b","HTTPStatusCode":200,"HTTPHeaders":{"content-type":"application\/x-amz-json-1.1","date":"Fri, 30 Oct 2020 16:02:42 GMT","x-amzn-requestid":"80f78525-c82f-47e9-b885-fee73c64017b","content-length":"2184","connection":"keep-alive"},"RetryAttempts":0}}} +{"image_uuid":"b840de61-fb9d-4ec5-9572-8d778875869f","response":{"Labels":[{"Name":"Plant","Confidence":97.7835769653,"Instances":[],"Parents":[]}]},"LabelModelVersion":"2.0","ResponseMetadata":{"RequestId":"0e9cd4f1-055e-4d4f-af3b-9a1884268f68","HTTPStatusCode":200,"HTTPHeaders":{"content-type":"application\/x-amz-json-1.1","date":"Thu, 29 Oct 2020 19:46:03 GMT","x-amzn-requestid":"0e9cd4f1-055e-4d4f-af3b-9a1884268f68","content-length":"1109","connection":"keep-alive"},"RetryAttempts":0}} {"image_uuid":"ec9b23fe-53bf-4662-af32-2512cbbc6635","response":{"Labels":[],"LabelModelVersion":"2.0","ResponseMetadata":{"RequestId":"578b7fea-0bbf-4be4-a01d-8338b7b8aafc","HTTPStatusCode":200,"HTTPHeaders":{"content-type":"application\/x-amz-json-1.1","date":"Fri, 30 Oct 2020 16:02:42 GMT","x-amzn-requestid":"578b7fea-0bbf-4be4-a01d-8338b7b8aafc","content-length":"374","connection":"keep-alive"},"RetryAttempts":0}}}