From 0dfde78e0cc06eb3480b8339b2e223c50c7f6f32 Mon Sep 17 00:00:00 2001 From: MaertHaekkinen Date: Sat, 26 Oct 2024 21:26:40 +0300 Subject: [PATCH] Update Muis management commands --- .../fix_missing_images_for_muis_imports.py | 25 ++- .../commands/import_photos_from_muis.py | 206 +++++++++++------- .../management/commands/refresh_albums.py | 8 +- .../management/commands/update_muis_photos.py | 101 +++++---- .../0030_alter_applicationexception_photo.py | 19 ++ ajapaik/ajapaik/models.py | 60 +---- ajapaik/ajapaik/muis_utils.py | 151 +++++++------ 7 files changed, 308 insertions(+), 262 deletions(-) create mode 100644 ajapaik/ajapaik/migrations/0030_alter_applicationexception_photo.py diff --git a/ajapaik/ajapaik/management/commands/fix_missing_images_for_muis_imports.py b/ajapaik/ajapaik/management/commands/fix_missing_images_for_muis_imports.py index 145880373..467c486a6 100644 --- a/ajapaik/ajapaik/management/commands/fix_missing_images_for_muis_imports.py +++ b/ajapaik/ajapaik/management/commands/fix_missing_images_for_muis_imports.py @@ -34,14 +34,14 @@ def handle(self, *args, **options): photos = Photo.objects.filter(rephoto_of=None, back_of_id=None, width=None, height=None, external_id__startswith='oai:muis.ee') else: - print("Please add photo ids that you want to update") + logger.info("Please add photo ids that you want to update") if not photos: - print("No photos found to update") + logger.info("No photos found to update") muis_url = 'https://www.muis.ee/OAIService/OAIService' for photo in photos: - print('Running update for: {photo.id}') + logger.info('Running update for: {photo.id}') list_identifiers_url = f'{muis_url}?verb=GetRecord&identifier={photo.external_id}&metadataPrefix=lido' url_response = urllib.request.urlopen(list_identifiers_url) @@ -59,28 +59,28 @@ def handle(self, *args, **options): link_resource_record = rec.find(f'{resource_wrap}lido:resourceSet/lido:{rp_lr}', ns) if not photo.external_id.startswith('oai:muis.ee'): - print(f'Skipping, not a muis photo, {photo.id} ({photo.external_id})') + logger.info(f'Skipping, not a muis photo, {photo.id} ({photo.external_id})') if link_resource_record is None: - print(f"Skipping {photo.id} ({photo.external_id}), as there is no image resource") + logger.info(f"Skipping {photo.id} ({photo.external_id}), as there is no image resource") continue image_url = link_resource_record.text if link_resource_record is None: - print(f"Skipping {photo.id} ({photo.external_id}), as there is not image extension specified") + logger.info(f"Skipping {photo.id} ({photo.external_id}), as there is not image extension specified") continue image_extension = (link_resource_record.attrib['{' + ns['lido'] + '}formatResource']).lower() if not image_url or image_extension not in ['gif', 'jpg', 'jpeg', 'png', 'tif', 'tiff', 'webp']: - print( + logger.info( f"Skipping {photo.id} ({photo.external_id}), as there are no photos which are supported") continue response = requests.get(image_url) if response.status_code != 200: - print( + logger.info( f"Skipping {photo.id} ({photo.external_id}), as we did not get a valid response when downloading") continue @@ -90,12 +90,15 @@ def handle(self, *args, **options): with open(f'{MEDIA_ROOT}/{photo.image.name}', 'wb') as handler: handler.write(img_data) - print(f'{MEDIA_ROOT}/{photo.image.name}') + logger.info(f'{MEDIA_ROOT}/{photo.image.name}') photo = Photo.objects.get(id=photo.id) photo.set_calculated_fields() - print(f'Updated image file for {photo.id} ({photo.external_id})') + # This is weird, but it makes the image dimensions update on save, just calling .save might not work + logger.info(logger.info.image, photo.image.width, photo.image.height) + photo.save() + logger.info(f'Updated image file for {photo.id} ({photo.external_id})') except Exception as e: - print(e) + logger.info(e) logger.exception(e) exception = ApplicationException(exception=traceback.format_exc(), photo=photo) exception.save() diff --git a/ajapaik/ajapaik/management/commands/import_photos_from_muis.py b/ajapaik/ajapaik/management/commands/import_photos_from_muis.py index 6ef8de6e4..4cf9ce6f9 100644 --- a/ajapaik/ajapaik/management/commands/import_photos_from_muis.py +++ b/ajapaik/ajapaik/management/commands/import_photos_from_muis.py @@ -1,18 +1,18 @@ import logging import time import traceback -import urllib import xml.etree.ElementTree as ET -from datetime import datetime, timezone, timedelta -from urllib.parse import quote +from datetime import datetime, timedelta +from urllib.parse import quote_plus import requests from django.conf import settings from django.core.management.base import BaseCommand +from django.utils.timezone import now -from ajapaik.ajapaik.models import Album, AlbumPhoto, Dating, Photo, Source, ApplicationException +from ajapaik.ajapaik.models import Album, AlbumPhoto, Photo, Source, ApplicationException from ajapaik.ajapaik.muis_utils import add_person_albums, extract_dating_from_event, add_dating_to_photo, \ - add_geotag_from_address_to_photo, get_muis_date_and_prefix, set_text_fields_from_muis, reset_modeltranslated_field + add_geotag_from_address_to_photo, get_muis_date_and_prefix, set_text_fields_from_muis, reset_photo_translated_field from ajapaik.ajapaik.utils import ImportBlacklistService @@ -36,23 +36,46 @@ def handle(self, *args, **options): muis_url = 'https://www.muis.ee/OAIService/OAIService' set_name = (options['set_name'])[0] museum_name = set_name.split(':')[0] - source = Source.objects.filter(name=museum_name).first() import_blacklist_service = ImportBlacklistService() + ns = {'d': 'http://www.openarchives.org/OAI/2.0/', 'lido': 'http://www.lido-schema.org', + 'xsi': 'http://www.w3.org/2001/XMLSchema-instance'} - if source is None: - sets_url = f'{muis_url}?verb=ListSets' - url_response = urllib.request.urlopen(sets_url) - parser = ET.XMLParser(encoding="utf-8") - tree = ET.fromstring(url_response.read(), parser=parser) - ns = {'d': 'http://www.openarchives.org/OAI/2.0/'} - sets = tree.findall('d:ListSets/d:set', ns) - for s in sets: - if s.find('d:setSpec', ns).text == museum_name: - source_description = s.find('d:setName', ns).text - Source.objects.create(name=museum_name, description=source_description) - break - - source = Source.objects.filter(name=museum_name).first() + sets_url = f'{muis_url}?verb=ListSets' + sets_response = requests.get(sets_url) + + if sets_response.status_code != 200: + logger.info(f'Failed to get a response from MUIS API due to Server Error, {sets_url}') + return + + if sets_response.text == '': + logger.info(f'No results for MUIS API, {sets_url}') + return + + tree = ET.fromstring(sets_response.text) + sets = tree.findall('d:ListSets/d:set', ns) + + if not sets: + logger.info(f'Did not find any sets to match with') + return + + source = None + set_exists = False + for s in sets: + if s.find('d:setSpec', ns).text == museum_name: + logger.info(f"Found {museum_name}") + museum_name = s.find('d:setName', ns).text + source = Source.objects.filter(name=museum_name, description=museum_name).first() + + if not source: + Source.objects.create(name=museum_name, description=museum_name) + + if s.find('d:setSpec', ns).text == options['set_name'][0]: + set_exists = True + break + + if not set_exists: + logger.info(f"Did not find set {options['set_name'][0]}") + return dates = [] start = datetime(2008, 3, 1) @@ -72,74 +95,99 @@ def handle(self, *args, **options): until_date = from_date continue - list_identifiers_url = f'{muis_url}?verb=ListRecords&set={quote(set_name)}&from={from_date}' + \ + list_identifiers_url = f'{muis_url}?verb=ListRecords&set={quote_plus(set_name)}&from={from_date}' + \ f'&until={until_date}&metadataPrefix=lido' - url_response = urllib.request.urlopen(list_identifiers_url) - parser = ET.XMLParser(encoding="utf-8") - redurl = url_response.read() - tree = ET.fromstring(redurl, parser=parser) - ns = {'d': 'http://www.openarchives.org/OAI/2.0/', 'lido': 'http://www.lido-schema.org'} + until_date = from_date + response = requests.get(list_identifiers_url) + + if response.status_code != 200: + logger.info(f'Failed to get a response from MUIS API due to Server Error, {list_identifiers_url}') + continue + + if response.text == '': + logger.info(f'No results for MUIS API, {list_identifiers_url}') + continue + + tree = ET.fromstring(response.text) header = 'd:header/' records = tree.findall('d:ListRecords/d:record', ns) - record = 'd:metadata/lido:lidoWrap/lido:lido/' - object_identification_wrap = f'{record}lido:descriptiveMetadata/lido:objectIdentificationWrap/' - object_description_wraps = \ - f'{object_identification_wrap}lido:objectDescriptionWrap/lido:objectDescriptionSet' - title_wrap = f'{object_identification_wrap}lido:titleWrap/' - repository_wrap = f'{object_identification_wrap}lido:repositoryWrap/' - event_wrap = f'{record}lido:descriptiveMetadata/lido:eventWrap/' - record_wrap = f'{record}lido:administrativeMetadata/lido:recordWrap/' - resource_wrap = f'{record}lido:administrativeMetadata/lido:resourceWrap/' - actor_wrap = f'{event_wrap}lido:eventSet/lido:event/lido:eventActor/' - - for rec in records: + record_tag = 'd:metadata/lido:lidoWrap/lido:lido/' + object_identification_tag = f'{record_tag}lido:descriptiveMetadata/lido:objectIdentificationWrap/' + object_description_tags = \ + f'{object_identification_tag}lido:objectDescriptionWrap/lido:objectDescriptionSet' + title_tag = f'{object_identification_tag}lido:titleWrap/' + repository_tag = f'{object_identification_tag}lido:repositoryWrap/' + event_tag = f'{record_tag}lido:descriptiveMetadata/lido:eventWrap/' + resource_tag = f'{record_tag}lido:administrativeMetadata/lido:resourceWrap/' + record_wrap_tag = f'{record_tag}lido:administrativeMetadata/lido:recordWrap/' + actor_tag = f'{event_tag}lido:eventSet/lido:event/lido:eventActor/' + + for record in records: + photo = None + logger.info("Found record") try: locations = [] person_album_ids = [] - identifier_record = rec.find(f'{header}d:identifier', ns) - external_id = identifier_record.text if identifier_record else None + identifier_record = record.find(f'{header}d:identifier', ns) + # We can not check for identifier_record, as it will is falsy if it has no children + # That forces us to write these ugly is not None checks. + external_id = identifier_record.text if identifier_record is not None else None existing_photo = Photo.objects.filter(external_id=external_id).first() if existing_photo: continue - rp_lr = 'resourceRepresentation/lido:linkResource' - link_resource_record = rec.find(f'{resource_wrap}lido:resourceSet/lido:{rp_lr}', ns) - image_url = link_resource_record.text if link_resource_record else None + rp_lr = 'lido:resourceRepresentation/lido:linkResource' + link_resource_record = record.find(f'{resource_tag}lido:resourceSet/{rp_lr}', ns) + image_url = link_resource_record.text if link_resource_record is not None else None + + if not image_url: + logger.info("No image url, skipping") + continue + + file_extension = (link_resource_record.attrib['{' + ns['lido'] + '}formatResource']).lower() - if link_resource_record: - image_extension = (link_resource_record.attrib['{' + ns['lido'] + '}formatResource']).lower() - else: - image_extension = None + if not image_url or file_extension not in ['gif', 'jpg', 'jpeg', 'png', 'tif', 'tiff', 'webp']: + logger.info( + "Skipping as there are no photos which are supported") + continue - source_url_find = rec.find(f'{record_wrap}lido:recordInfoSet/lido:recordInfoLink', ns) + source_url_find = record.find(f'{record_wrap_tag}lido:recordInfoSet/lido:recordInfoLink', ns) source_url = source_url_find.text \ if source_url_find is not None \ else None - identifier_find = rec.find(f'{repository_wrap}lido:repositorySet/lido:workID', ns) - identifier = identifier_find.text if identifier_find else None + if not source_url: + logger.warning(f'Found an record without an source URL! Skipping') + continue - if identifier and import_blacklist_service.is_blacklisted(identifier): - logger.info(f'Skipping {identifier} as it is blacklisted.') + identifier_find = record.find(f'{repository_tag}lido:repositorySet/lido:workID', ns) + identifier = identifier_find.text if identifier_find is not None else None + + if not identifier: + logger.warning(f'Found an record without an identifier! Skipping') continue - if not image_url or image_extension not in ['gif', 'jpg', 'jpeg', 'png', 'tif', 'tiff', 'webp']: - logger.info(f"Skipping {identifier}, as there are no photos which are supported") + if import_blacklist_service.is_blacklisted(identifier): + logger.info(f'Skipping {identifier} as it is blacklisted.') continue response = requests.get(image_url) if response.status_code != 200: logger.info(f"Skipping {identifier}, as we did not get a valid response when downloading") + continue + logger.info("Sweet, let's add something") img_data = response.content image_id = external_id.split(':')[-1] - file_name = f'{set_name}_{image_id}.{image_extension}' + file_name = f'{set_name}_{image_id}.{file_extension}' file_name = file_name.replace(':', '_') path = f'{settings.MEDIA_ROOT}/uploads/{file_name}' + with open(path, 'wb') as handler: handler.write(img_data) + photo = Photo( image=path, source_key=identifier, @@ -147,58 +195,68 @@ def handle(self, *args, **options): external_id=external_id, source=source ) - dt = datetime.utcnow() - photo.muis_update_time = dt.replace(tzinfo=timezone.utc).isoformat() + photo.muis_update_time = now() photo.light_save() + logger.info("Saving photo") photo = Photo.objects.get(id=photo.id) photo.image.name = f'uploads/{file_name}' - title_find = rec.find(f'{title_wrap}lido:titleSet/lido:appellationValue', ns) + title_find = record.find(f'{title_tag}lido:titleSet/lido:appellationValue', ns) title = title_find.text \ if title_find is not None \ else None if title: - photo = reset_modeltranslated_field(photo, 'title', title) + photo = reset_photo_translated_field(photo, 'title', title) - dating = None - photo, dating = set_text_fields_from_muis(photo, dating, rec, object_description_wraps, ns) + photo, dating = set_text_fields_from_muis(photo, record, object_description_tags, ns) photo.light_save() creation_date_earliest = None creation_date_latest = None date_prefix_earliest = None date_prefix_latest = None - date_earliest_has_suffix = False date_latest_has_suffix = False - events = rec.findall(f'{event_wrap}lido:eventSet/lido:event', ns) - if events and len(events) > 0: + events = record.findall(f'{event_tag}lido:eventSet/lido:event', ns) + + if events: (locations, creation_date_earliest, creation_date_latest, date_prefix_earliest, date_prefix_latest, - date_earliest_has_suffix, date_latest_has_suffix) = extract_dating_from_event( events, locations, creation_date_earliest, creation_date_latest, - photo.latest_dating is not None or dating is not None, + bool(dating), ns ) + if dating: - creation_date_earliest, date_prefix_earliest, date_earliest_has_suffix = \ + creation_date_earliest, date_prefix_earliest, _ = \ get_muis_date_and_prefix(dating, False) creation_date_latest, date_prefix_latest, date_latest_has_suffix = \ get_muis_date_and_prefix(dating, True) - actors = rec.findall(f'{actor_wrap}lido:actorInRole', ns) + actors = record.findall(f'{actor_tag}lido:actorInRole', ns) person_album_ids, author = add_person_albums(actors, person_album_ids, ns) + if author: photo.author = author + person_albums = Album.objects.filter(id__in=person_album_ids) + if person_albums is not None: + for person_album in person_albums: + if not person_album.cover_photo: + person_album.cover_photo = photo + ap = AlbumPhoto(photo=photo, album=person_album, type=AlbumPhoto.FACE_TAGGED) + ap.save() + all_person_album_ids_set.add(person_album.id) + photo.add_to_source_album() + if locations and len(locations) > 0: photo = add_geotag_from_address_to_photo(photo, locations) @@ -208,8 +266,6 @@ def handle(self, *args, **options): creation_date_latest, date_prefix_earliest, date_prefix_latest, - Dating, - date_earliest_has_suffix, date_latest_has_suffix, ) photo.light_save() @@ -220,15 +276,6 @@ def handle(self, *args, **options): ap = AlbumPhoto(photo=photo, album=album, type=AlbumPhoto.CURATED) ap.save() - person_albums = Album.objects.filter(id__in=person_album_ids) - if person_albums is not None: - for person_album in person_albums: - if not person_album.cover_photo: - person_album.cover_photo = photo - ap = AlbumPhoto(photo=photo, album=person_album, type=AlbumPhoto.FACE_TAGGED) - ap.save() - all_person_album_ids_set.add(person_album.id) - photo.set_calculated_fields() except Exception as e: logger.exception(e) @@ -236,7 +283,6 @@ def handle(self, *args, **options): exception.save() time.sleep(1) - until_date = from_date for album in albums: album.light_save() diff --git a/ajapaik/ajapaik/management/commands/refresh_albums.py b/ajapaik/ajapaik/management/commands/refresh_albums.py index fc927fa02..3f195a8d6 100644 --- a/ajapaik/ajapaik/management/commands/refresh_albums.py +++ b/ajapaik/ajapaik/management/commands/refresh_albums.py @@ -1,5 +1,5 @@ -from random import randint import time +from random import randint from django.contrib.gis.geos import Point from django.core.management.base import BaseCommand @@ -11,10 +11,11 @@ class Command(BaseCommand): help = 'Refresh albums' def handle(self, *args, **options): - albums = Album.objects.exclude(atype__in=[Album.AUTO, Album.FAVORITES]) + albums = Album.objects.exclude(atype__in=[Album.AUTO, Album.FAVORITES]).prefetch() + for a in albums: historic_photo_qs = a.get_historic_photos_queryset_with_subalbums() - if not historic_photo_qs.exists(): + if not historic_photo_qs: continue geotagged_photo_qs = a.get_geotagged_historic_photo_queryset_with_subalbums() @@ -35,6 +36,7 @@ def handle(self, *args, **options): random_index = randint(0, historic_photo_qs.count() - 1) random_photo = historic_photo_qs[random_index] a.cover_photo = random_photo + if random_photo.flip: a.cover_photo_flipped = random_photo.flip diff --git a/ajapaik/ajapaik/management/commands/update_muis_photos.py b/ajapaik/ajapaik/management/commands/update_muis_photos.py index 31212fd99..d0ed5469e 100644 --- a/ajapaik/ajapaik/management/commands/update_muis_photos.py +++ b/ajapaik/ajapaik/management/commands/update_muis_photos.py @@ -1,29 +1,39 @@ -from datetime import datetime -from datetime import timezone -import urllib +import logging +import xml.etree.ElementTree as ET +import requests from django.core.management.base import BaseCommand +from django.utils.timezone import now +from ajapaik.ajapaik.models import Album, AlbumPhoto, Photo, ApplicationException from ajapaik.ajapaik.muis_utils import add_dating_to_photo, add_person_albums, add_geotag_from_address_to_photo, \ - extract_dating_from_event, get_muis_date_and_prefix, set_text_fields_from_muis, reset_modeltranslated_field -from ajapaik.ajapaik.models import Album, AlbumPhoto, Dating, Photo, ApplicationException -import xml.etree.ElementTree as ET + extract_dating_from_event, get_muis_date_and_prefix, set_text_fields_from_muis, reset_photo_translated_field class Command(BaseCommand): help = 'Update photos from MUIS' def handle(self, *args, **options): + logger = logging.getLogger(__name__) + muis_url = 'https://www.muis.ee/OAIService/OAIService' all_person_album_ids_set = set() photos = Photo.objects.filter(source_url__contains='muis.ee') for photo in photos: try: - parser = ET.XMLParser(encoding="utf-8") list_identifiers_url = f'{muis_url}?verb=GetRecord&identifier={photo.external_id}&metadataPrefix=lido' - url_response = urllib.request.urlopen(list_identifiers_url) - tree = ET.fromstring(url_response.read(), parser=parser) + response = requests.get(list_identifiers_url) + + if response.status_code != 200: + logger.info(f'Failed to get a response from MUIS API due to Server Error, {list_identifiers_url}') + continue + + if response.text == '': + logger.info(f'No results for MUIS API, {list_identifiers_url}') + continue + + tree = ET.fromstring(response.read()) ns = {'d': 'http://www.openarchives.org/OAI/2.0/', 'lido': 'http://www.lido-schema.org'} rec = tree.find('d:GetRecord/d:record', ns) @@ -41,36 +51,34 @@ def handle(self, *args, **options): title = title_find.text \ if title_find is not None \ else None - photo = reset_modeltranslated_field(photo, 'title', title) - photo.light_save() - dating = None - photo, dating = set_text_fields_from_muis(photo, dating, rec, object_description_wraps, ns) - photo.light_save() + photo = reset_photo_translated_field(photo, 'title', title) + + photo, dating = set_text_fields_from_muis(photo, rec, object_description_wraps, ns) creation_date_earliest = None creation_date_latest = None date_prefix_earliest = None date_prefix_latest = None - date_earliest_has_suffix = False date_latest_has_suffix = False location = [] events = rec.findall(f'{event_wrap}lido:eventSet/lido:event', ns) - existing_dating = Dating.objects.filter(photo=photo, profile=None).first() - if events is not None and len(events) > 0: + existing_dating = photo.datings.filter(profile=None).first() + has_new_dating_data = dating is not None and not existing_dating + + if events: location, \ - creation_date_earliest, \ - creation_date_latest, \ - date_prefix_earliest, \ - date_prefix_latest, \ - date_earliest_has_suffix, \ - date_latest_has_suffix, \ + creation_date_earliest, \ + creation_date_latest, \ + date_prefix_earliest, \ + date_prefix_latest, \ + date_latest_has_suffix, \ = extract_dating_from_event( - events, - location, - creation_date_earliest, - creation_date_latest, - dating is not None and existing_dating is None, - ns - ) + events, + location, + creation_date_earliest, + creation_date_latest, + has_new_dating_data, + ns + ) if dating is not None and existing_dating is None: creation_date_earliest, date_prefix_earliest, date_earliest_has_suffix = \ get_muis_date_and_prefix(dating, False) @@ -79,38 +87,39 @@ def handle(self, *args, **options): actors = rec.findall(f'{actor_wrap}lido:actorInRole', ns) person_album_ids, author = add_person_albums(actors, person_album_ids, ns) + if author is not None: photo.author = author - if location != []: + + if location: photo = add_geotag_from_address_to_photo(photo, location) + + person_albums = Album.objects.filter(id__in=person_album_ids) or [] + for album in person_albums: + if not album.cover_photo: + album.cover_photo = photo + ap = AlbumPhoto(photo=photo, album=album, type=AlbumPhoto.FACE_TAGGED) + ap.save() + all_person_album_ids_set.add(album.id) + + photo.set_calculated_fields() + photo = add_dating_to_photo( photo, creation_date_earliest, creation_date_latest, date_prefix_earliest, date_prefix_latest, - Dating, - date_earliest_has_suffix, date_latest_has_suffix ) - dt = datetime.utcnow() - dt.replace(tzinfo=timezone.utc) - photo.muis_update_time = dt.replace(tzinfo=timezone.utc).isoformat() + photo.muis_update_time = now() photo.light_save() - person_albums = Album.objects.filter(id__in=person_album_ids) - if person_albums is not None: - for album in person_albums: - if not album.cover_photo: - album.cover_photo = photo - ap = AlbumPhoto(photo=photo, album=album, type=AlbumPhoto.FACE_TAGGED) - ap.save() - - all_person_album_ids_set.add(album.id) - photo.set_calculated_fields() except Exception as e: + logger.info(e) exception = ApplicationException(exception=e, photo=photo) exception.save() + all_person_album_ids = list(all_person_album_ids_set) all_person_albums = Album.objects.filter(id__in=all_person_album_ids) diff --git a/ajapaik/ajapaik/migrations/0030_alter_applicationexception_photo.py b/ajapaik/ajapaik/migrations/0030_alter_applicationexception_photo.py new file mode 100644 index 000000000..c566054d5 --- /dev/null +++ b/ajapaik/ajapaik/migrations/0030_alter_applicationexception_photo.py @@ -0,0 +1,19 @@ +# Generated by Django 4.2.16 on 2024-10-25 23:57 + +from django.db import migrations, models +import django.db.models.deletion + + +class Migration(migrations.Migration): + + dependencies = [ + ('ajapaik', '0029_delete_action'), + ] + + operations = [ + migrations.AlterField( + model_name='applicationexception', + name='photo', + field=models.ForeignKey(null=True, on_delete=django.db.models.deletion.CASCADE, to='ajapaik.photo'), + ), + ] diff --git a/ajapaik/ajapaik/models.py b/ajapaik/ajapaik/models.py index 5c04a1663..b665fc77d 100644 --- a/ajapaik/ajapaik/models.py +++ b/ajapaik/ajapaik/models.py @@ -211,6 +211,7 @@ class AlbumPhoto(Model): class Meta: db_table = 'project_albumphoto' + # FIXME: May be causing bugs elsewhere # ordering = ['-created'] @@ -331,12 +332,6 @@ def save(self, *args, **kwargs): self.subalbum_of.save() connections['default'].get_unified_index().get_index(Album).update_object(self) - def get_historic_photos_queryset_with_subalbums_old(self): - qs = self.photos.filter(rephoto_of__isnull=True) - for sa in self.subalbums.filter(atype__in=[Album.CURATED, Album.PERSON]): - qs = qs | sa.photos.filter(rephoto_of__isnull=True) - return qs.distinct('id') - def get_historic_photos_queryset_with_subalbums(self): sa_ids = [self.id] for sa in self.subalbums.filter(atype__in=[Album.CURATED, Album.PERSON]): @@ -345,35 +340,16 @@ def get_historic_photos_queryset_with_subalbums(self): albumphoto__album__in=sa_ids) return qs.distinct('id') - def get_geotagged_historic_photo_queryset_with_subalbums_old(self): - qs = self.photos.filter(rephoto_of__isnull=True, lat__isnull=False, lon__isnull=False) - for sa in self.subalbums.filter(atype__in=[Album.CURATED, Album.PERSON]): - qs = qs | sa.photos.filter(rephoto_of__isnull=True, lat__isnull=False, lon__isnull=False) - return qs.distinct('id') - def get_geotagged_historic_photo_queryset_with_subalbums(self): qs = self.get_historic_photos_queryset_with_subalbums().filter(lat__isnull=False, lon__isnull=False) return qs.distinct('id') - def get_rephotos_queryset_with_subalbums_old(self): - qs = self.get_all_photos_queryset_with_subalbums_old().filter(rephoto_of__isnull=False) - return qs.distinct('pk') - def get_rephotos_queryset_with_subalbums(self): historic_photo_qs = self.get_historic_photos_queryset_with_subalbums().only('id').order_by() qs = Photo.objects.filter(rephoto_of__isnull=False, rephoto_of__in=historic_photo_qs.values('id').order_by()).order_by() return qs.distinct('pk') - def get_all_photos_queryset_with_subalbums_old(self): - qs = self.photos.all() - for sa in self.subalbums.filter(atype__in=[Album.CURATED, Album.PERSON]): - qs = qs | sa.photos.all() - - photo_ids = qs.values_list('pk', flat=True) - qs = qs | Photo.objects.filter(rephoto_of__isnull=False, rephoto_of_id__in=photo_ids) - return qs.distinct('pk') - # All photos = historical photos + rephotos def get_all_photos_queryset_with_subalbums(self): historic_photo_qs = self.get_historic_photos_queryset_with_subalbums().order_by() @@ -388,14 +364,6 @@ def get_all_photos_queryset_with_subalbums(self): qs = Photo.objects.filter(id__in=historic_photo_list) return qs.distinct('pk') - def get_comment_count_with_subalbums_old(self): - qs = self.get_all_photos_queryset_with_subalbums_old().filter(comment_count__gt=0).order_by() - count = 0 - for each in qs: - count += each.comment_count - - return count - def get_comment_count_with_subalbums(self): historic_photo_qs = self.get_historic_photos_queryset_with_subalbums() \ .filter(Q(comment_count__gt=0) | Q(first_rephoto__isnull=False)) \ @@ -416,13 +384,6 @@ def get_comment_count_with_subalbums(self): else: return count - def get_confirmed_similar_photo_count_with_subalbums_old(self): - qs = self.get_all_photos_queryset_with_subalbums_old().order_by() - photo_ids = qs.values_list('pk', flat=True) - count = ImageSimilarity.objects.filter( - from_photo__in=photo_ids, confirmed=True).only('pk').distinct('pk').order_by().count() - return count - def get_confirmed_similar_photo_count_with_subalbums(self): qs = self.get_all_photos_queryset_with_subalbums().order_by() photo_ids = qs.values_list('pk', flat=True) @@ -430,29 +391,12 @@ def get_confirmed_similar_photo_count_with_subalbums(self): from_photo__in=photo_ids, confirmed=True).only('pk').distinct('pk').order_by().count() return count - def get_similar_photo_count_with_subalbums_old(self): - qs = self.get_all_photos_queryset_with_subalbums_old().order_by() - photo_ids = qs.values_list('pk', flat=True) - count = ImageSimilarity.objects.filter(from_photo__in=photo_ids).only('pk').distinct('pk').order_by().count() - return count - def get_similar_photo_count_with_subalbums(self): qs = self.get_all_photos_queryset_with_subalbums().order_by() photo_ids = qs.values_list('pk', flat=True) count = ImageSimilarity.objects.filter(from_photo__in=photo_ids).only('pk').distinct('pk').order_by().count() return count - def set_calculated_fields_old(self): - self.photo_count_with_subalbums = self.get_historic_photos_queryset_with_subalbums_old().only( - 'pk').order_by().count() - self.rephoto_count_with_subalbums = self.get_rephotos_queryset_with_subalbums_old().only( - 'pk').order_by().count() - self.geotagged_photo_count_with_subalbums = self.get_geotagged_historic_photo_queryset_with_subalbums_old().only( - 'pk').order_by().count() - self.comments_count_with_subalbums = self.get_comment_count_with_subalbums_old() - self.similar_photo_count_with_subalbums = self.get_similar_photo_count_with_subalbums_old() - self.confirmed_similar_photo_count_with_subalbums = self.get_confirmed_similar_photo_count_with_subalbums_old() - def set_calculated_fields_new(self): self.photo_count_with_subalbums = self.get_historic_photos_queryset_with_subalbums().only( 'pk').order_by().count() @@ -1902,7 +1846,7 @@ class MuisCollection(Model): class ApplicationException(Model): exception = TextField(_('Title'), null=True, blank=True) external_id = CharField(max_length=100, null=True, blank=True) - photo = ForeignKey('Photo', on_delete=CASCADE) + photo = ForeignKey('Photo', on_delete=CASCADE, null=True) class ImportBlacklist(Model): diff --git a/ajapaik/ajapaik/muis_utils.py b/ajapaik/ajapaik/muis_utils.py index 229b26f02..4d57d9f4b 100644 --- a/ajapaik/ajapaik/muis_utils.py +++ b/ajapaik/ajapaik/muis_utils.py @@ -6,7 +6,7 @@ import roman from django.conf import settings -from ajapaik.ajapaik.models import Album, GeoTag, GoogleMapsReverseGeocode, Location, Photo, LocationPhoto +from ajapaik.ajapaik.models import Album, GeoTag, GoogleMapsReverseGeocode, Location, Photo, LocationPhoto, Dating century_suffixes = [ 'saj x', @@ -36,6 +36,7 @@ def unstructured_date_to_structured_date(date, all_date_prefixes, is_later_date): date = date.strip('.').strip().strip('.').strip() date = date.lower() + if date in starts_of_century: if date[:2].isdigit(): how_many_hundreds = int(date[:2]) @@ -212,61 +213,62 @@ def unstructured_date_to_structured_date(date, all_date_prefixes, is_later_date) return date -def set_text_fields_from_muis(photo, dating, rec, object_description_wraps, ns): - muis_description_field_pairs = { +def set_text_fields_from_muis(photo, rec, object_description_wraps, ns): + dating = None + muis_description_field_mapping = { 'sisu kirjeldus': 'description', 'kommentaar': 'muis_comment', 'pealkiri': 'muis_title', '': 'muis_legends_and_descriptions', 'tekst objektil': 'muis_text_on_object', - 'dateering': None } - for field in muis_description_field_pairs: - if muis_description_field_pairs[field] is not None: - photo = reset_modeltranslated_field(photo, muis_description_field_pairs[field], None) - photo.save() - photo = Photo.objects.get(id=photo.id) + + for value in muis_description_field_mapping.values(): + photo = reset_photo_translated_field(photo, value) description_finds = rec.findall(object_description_wraps, ns) + for description_element in description_finds: - description_text_element = description_element.find('lido:descriptiveNoteValue', ns) description_type_element = description_element.find('lido:sourceDescriptiveNote', ns) - description_text = description_text_element.text if description_type_element is None: continue + description_type = description_type_element.text - if description_type in muis_description_field_pairs: + description_text_element = description_element.find('lido:descriptiveNoteValue', ns) + description_text = description_text_element.text + + if description_type in muis_description_field_mapping: if description_type == 'sisu kirjeldus': - photo = reset_modeltranslated_field( - photo, - muis_description_field_pairs[description_type], - description_text - ) photo.description_original_language = None elif description_type == 'dateering': dating = description_text else: - setattr(photo, muis_description_field_pairs[description_type], description_text) + setattr(photo, muis_description_field_mapping[description_type], description_text) + + photo.light_save() + return photo, dating -def reset_modeltranslated_field(photo, attribute_name, attribute_value): - setattr(photo, attribute_name, attribute_value) - photo.light_save() +def reset_photo_translated_field(photo, attribute_name, attribute_value=None): photo = Photo.objects.get(id=photo.id) detection_lang = 'et' + + setattr(photo, attribute_name, attribute_value) for language in settings.MODELTRANSLATION_LANGUAGES: if language == detection_lang: setattr(photo, f'{attribute_name}_{language}', attribute_value) else: setattr(photo, f'{attribute_name}_{language}', None) + photo.light_save() + return photo def extract_dating_from_event( events, - location, + locations, creation_date_earliest, creation_date_latest, skip_dating, @@ -276,12 +278,11 @@ def extract_dating_from_event( creation_event_types = ['valmistamine', '', 'pildistamine', 'sõjandus ja kaitse', 'sõjad'] date_prefix_earliest = None date_prefix_latest = None - earliest_had_decade_suffix = False latest_had_decade_suffix = False + for event in events: event_types = event.findall('lido:eventType/lido:term', ns) - if event_types is None: - continue + for event_type in event_types: if event_type.text == duplicate_event_type: break @@ -292,83 +293,96 @@ def extract_dating_from_event( earliest_date = event.find('lido:eventDate/lido:date/lido:earliestDate', ns) latest_date = event.find('lido:eventDate/lido:date/lido:latestDate', ns) - if earliest_date is not None and earliest_date.text is not None: - creation_date_earliest, date_prefix_earliest, earliest_had_decade_suffix, \ + if earliest_date is not None and earliest_date.text: + creation_date_earliest, date_prefix_earliest, _, \ = get_muis_date_and_prefix( earliest_date.text, False ) - if latest_date is not None and latest_date.text is not None: + if latest_date is not None and latest_date.text: creation_date_latest, date_prefix_latest, latest_had_decade_suffix, \ = get_muis_date_and_prefix( latest_date.text, True ) places = event.findall('lido:eventPlace/lido:place', ns) - if places is not None: - new_location = [] + + def _get_place_entity(place): + new_locations = [] + entity_type = place.attrib[f"{{{ns['lido']}}}politicalEntity"] + + if entity_type is None: + entity_type = place.attrib[f"{{{ns['lido']}}}geographicalEntity"] + + place_appellation_value = place.find('lido:namePlaceSet/lido:appellationValue', ns) + if place_appellation_value is not None: + new_locations.append((place_appellation_value.text, entity_type)) + + child = place.find('lido:partOfPlace', ns) + + return new_locations, child + + if places: for place in places: - entity_type = place.attrib['{' + ns['lido'] + '}politicalEntity'] - if entity_type is None: - entity_type = place.attrib['{' + ns['lido'] + '}geographicalEntity'] - place_appelation_value = place.find('lido:namePlaceSet/lido:appellationValue', ns) - if place_appelation_value is not None: - new_location.append((place_appelation_value.text, entity_type)) - - child = place.find('lido:partOfPlace', ns) + new_locations, child = get_place_entity(place) + locations.extend(new_locations) + while child is not None: - entity_type = child.attrib['{' + ns['lido'] + '}politicalEntity'] - if entity_type is None: - entity_type = child.attrib['{' + ns['lido'] + '}geographicalEntity'] - place_appelation_value = child.find('lido:namePlaceSet/lido:appellationValue', ns) - if place_appelation_value is not None: - new_location.append((place_appelation_value.text, entity_type)) - child = child.find('lido:partOfPlace', ns) - if new_location != []: - location.append(new_location) - return location, creation_date_earliest, creation_date_latest, date_prefix_earliest, date_prefix_latest, \ - earliest_had_decade_suffix, latest_had_decade_suffix + new_locations, child + locations.extend(new_locations) + + return locations, creation_date_earliest, creation_date_latest, date_prefix_earliest, date_prefix_latest, latest_had_decade_suffix def add_person_albums(actors, person_album_ids, ns): author = None + for actor in actors: term = actor.find('lido:roleActor/lido:term', ns) if term is None: continue + if term.text in ['kujutatu', 'autor']: - muis_actor_wrapper = actor.find("lido:actor/lido:actorID", ns) - muis_actor_id = None - if muis_actor_wrapper is not None: - muis_actor_id = int(muis_actor_wrapper.text) names = actor.findall("lido:actor/lido:nameActorSet/lido:appellationValue", ns) - all_names = '' main_name = '' + all_names = '' + for name in names: - if name.attrib['{' + ns['lido'] + '}pref'] == 'preferred': + if name.attrib[f"{{{ns['lido']}}}pref"] == 'preferred': main_name_parts = name.text.split(',') main_name = main_name_parts[-1].strip() + if len(main_name_parts) > 1: for part in main_name_parts[0:len(main_name_parts) - 1]: main_name += f' {part}' - all_names += (name.attrib['{' + ns['lido'] + '}label'] + ': ' + name.text + '. ').capitalize() + + label = name.attrib[f"{{{ns['lido']}}}label"] + all_names += f"{label}: {name.text}. ".capitalize() if term.text == 'autor': author = main_name continue existing_album = Album.objects.filter(name=main_name, atype=Album.PERSON).first() - if existing_album and muis_actor_id: + muis_actor_wrapper = actor.find("lido:actor/lido:actorID", ns) + + if muis_actor_wrapper is not None: + muis_actor_id = int(muis_actor_wrapper.text) + + if existing_album: + if muis_actor_id: + existing_album.muis_person_ids = [*existing_album.muis_person_ids, muis_actor_id] + existing_album.save(update_fields=['muis_person_ids']) + person_album_ids.append(existing_album.id) continue person_album = Album( name=main_name, description=all_names, - atype=Album.PERSON + atype=Album.PERSON, + muis_person_ids=[muis_actor_id] if muis_actor_id else [] ) - if muis_actor_id: - person_album.muis_person_ids = [muis_actor_id] person_album.light_save() person_album_ids.append(person_album.id) @@ -376,13 +390,14 @@ def add_person_albums(actors, person_album_ids, ns): def get_muis_date_and_prefix(date, is_later_date): + date_prefix = None + had_decade_suffix = False approximate_date_prefixes = ['ligikaudu', 'arvatav aeg', 'umbes'] before_date_prefix = 'enne' after_date_prefix = 'pärast' all_date_prefixes = approximate_date_prefixes + [before_date_prefix, after_date_prefix] date = unstructured_date_to_structured_date(date, all_date_prefixes, is_later_date) - date_prefix = None - had_decade_suffix = False + if date is None: return date, date_prefix, had_decade_suffix @@ -393,14 +408,19 @@ def get_muis_date_and_prefix(date, is_later_date): for split in earliest_date_split: earliest_date_split[index] = split.strip() index += 1 + if earliest_date_split[-1] == decade_suffix: had_decade_suffix = True + if earliest_date_split[0] in all_date_prefixes: date_prefix = earliest_date_split[0] earliest_date_split = earliest_date_split[1:] + earliest_date_split.reverse() + while len(earliest_date_split[0]) < 4: earliest_date_split[0] = f'0{earliest_date_split[0]}' + if len(earliest_date_split) > 1: while len(earliest_date_split[1]) < 2: earliest_date_split[1] = f'0{earliest_date_split[1]}' @@ -408,6 +428,7 @@ def get_muis_date_and_prefix(date, is_later_date): while len(earliest_date_split[2]) < 2: earliest_date_split[2] = f'0{earliest_date_split[2]}' date = '.'.join(earliest_date_split) + if len(earliest_date_split) == 1: date = earliest_date_split[0] @@ -524,8 +545,8 @@ def raw_date_to_date(raw_date): return date -def add_dating_to_photo(photo, earliest_date, latest_date, date_prefix_earliest, date_prefix_latest, Dating, - date_earliest_has_suffix, date_latest_has_suffix): +def add_dating_to_photo(photo, earliest_date, latest_date, date_prefix_earliest, date_prefix_latest, + date_latest_has_suffix): if latest_date is None and earliest_date is None: return photo @@ -533,6 +554,7 @@ def add_dating_to_photo(photo, earliest_date, latest_date, date_prefix_earliest, earliest_date = earliest_date.replace('aastad', '').strip('.') if latest_date is not None: latest_date = latest_date.replace('aastad', '').strip('.') + before_date_prefix = 'enne' after_date_prefix = 'pärast' approximate_date_prefixes = ['ligikaudu', 'arvatav aeg', 'umbes'] @@ -579,4 +601,5 @@ def add_dating_to_photo(photo, earliest_date, latest_date, date_prefix_earliest, photo.first_dating = dating.created photo.dating_count += 1 photo.latest_dating = dating.created + photo.save(update_fields=['dating_count', 'latest_dating']) return photo