diff --git a/.coveragerc b/.coveragerc index c48bd17b..af5751ee 100644 --- a/.coveragerc +++ b/.coveragerc @@ -9,3 +9,5 @@ omit = */management/* */tests.py */experiments.py + */conftest.py + */tests/* diff --git a/.gitlab/test-image.yml b/.gitlab/test-image.yml index 73e5933a..2cfde251 100644 --- a/.gitlab/test-image.yml +++ b/.gitlab/test-image.yml @@ -9,5 +9,9 @@ test_image: - docker build --cache-from $CI_REGISTRY_IMAGE/celus-test:latest --tag $CI_REGISTRY_IMAGE/celus-test:latest --target celus-test -f ci/Dockerfile . - docker push $CI_REGISTRY_IMAGE/celus-test:latest + rules: + - if: '$CI_COMMIT_BRANCH == "master"' + when: on_success + tags: - dind diff --git a/apps/activity/tests.py b/apps/activity/tests.py deleted file mode 100644 index 7ce503c2..00000000 --- a/apps/activity/tests.py +++ /dev/null @@ -1,3 +0,0 @@ -from django.test import TestCase - -# Create your tests here. diff --git a/apps/core/logic/sync.py b/apps/core/logic/sync.py index af7a5c1c..ea676ee2 100644 --- a/apps/core/logic/sync.py +++ b/apps/core/logic/sync.py @@ -97,6 +97,7 @@ def sync_data(self, records: [dict]) -> dict: ) } org_ext_id_to_db_obj = {org.ext_id: org for org in Organization.objects.all()} + seen_user_orgs = set() for (org_ext_id, user_ext_id), is_admin in self._org_user_status.items(): uo = org_user_to_db_obj.get((org_ext_id, user_ext_id)) if not uo: @@ -116,6 +117,7 @@ def sync_data(self, records: [dict]) -> dict: user=user, organization=organization, is_admin=is_admin, source=self.data_source ) org_user_to_db_obj[(org_ext_id, user_ext_id)] = uo + seen_user_orgs.add(uo.pk) stats['User-Org created'] += 1 else: if uo.is_admin != is_admin: @@ -124,6 +126,13 @@ def sync_data(self, records: [dict]) -> dict: stats['User-Org synced'] += 1 else: stats['User-Org unchanged'] += 1 + seen_user_orgs.add(uo.pk) + deleted, _details = ( + UserOrganization.objects.filter(source=self.data_source) + .exclude(pk__in=seen_user_orgs) + .delete() + ) + stats['User-Org deleted'] = deleted return stats diff --git a/apps/core/tests/test_sync.py b/apps/core/tests/test_sync.py index 148f1e40..08a386c7 100644 --- a/apps/core/tests/test_sync.py +++ b/apps/core/tests/test_sync.py @@ -6,6 +6,7 @@ from core.logic.sync import sync_identities from core.models import Identity, DataSource from erms.sync import ERMSObjectSyncer +from organizations.models import Organization, UserOrganization from ..logic.sync import sync_users @@ -66,6 +67,33 @@ def test_sync_users_with_removal(self, data_source): assert stats['removed'][0] == 1 assert User.objects.count() == 2 + @pytest.mark.now + def test_sync_users_with_user_org_link_removal(self, data_source): + """ + Test that after a user gets removed from an organization, the link is properly removed + """ + User = get_user_model() + assert User.objects.count() == 0 + Organization.objects.create(ext_id=10, name='Org 1', short_name='org_1') + Organization.objects.create(ext_id=100, name='Org 2', short_name='org_2') + input_data = [ + {'id': 1, 'vals': {'name@cs': ['John Doe']}, 'refs': {'employee of': [10, 100]}}, + ] + stats = sync_users(data_source, input_data) + assert stats['removed'][0] == 0 + assert stats[ERMSObjectSyncer.Status.NEW] == 1 + assert User.objects.count() == 1 + assert UserOrganization.objects.count() == 2 + # now remove one of the organizations + input_data[0]['refs']['employee of'].remove(10) + stats = sync_users(data_source, input_data) + assert stats[ERMSObjectSyncer.Status.NEW] == 0 + assert stats[ERMSObjectSyncer.Status.UNCHANGED] == 1 + assert stats['removed'][0] == 0 + assert stats['User-Org deleted'] == 1 + assert User.objects.count() == 1 + assert UserOrganization.objects.count() == 1 + @pytest.mark.django_db class TestIdentitySync(object): diff --git a/apps/logs/logic/data_import.py b/apps/logs/logic/data_import.py index b36d438a..7fcc1d4e 100644 --- a/apps/logs/logic/data_import.py +++ b/apps/logs/logic/data_import.py @@ -4,7 +4,7 @@ from typing import Optional from core.logic.debug import log_memory -from logs.logic.validation import clean_and_validate_issn, ValidationError +from logs.logic.validation import clean_and_validate_issn, ValidationError, normalize_isbn from logs.models import ImportBatch from nigiri.counter5 import CounterRecord from organizations.models import Organization @@ -31,14 +31,7 @@ def get_or_create_with_map(model, mapping, attr_name, attr_value, other_attrs=No class TitleManager(object): def __init__(self): - # in the following, we use values_list to speed things up as there are a lot of objects - # and creating them takes a lot of time - # (e.g. processing time for import was cut from 3.5s to 1.2s by switching to this) self.key_to_title_id_and_pub_type = {} - # tuple(t[:5]): tuple(t[5:]) - # for t in Title.objects.all().order_by(). - # values_list('name', 'isbn', 'issn', 'eissn', 'doi', 'pk', 'pub_type') - # } self.stats = Counter() def prefetch_titles(self, records: [TitleRec]): @@ -54,6 +47,29 @@ def prefetch_titles(self, records: [TitleRec]): } logger.debug('Prefetched %d records', len(self.key_to_title_id_and_pub_type)) + @classmethod + def normalize_title_rec(cls, record: TitleRec) -> TitleRec: + """ + Normalize specific fields in the record and return a new TitleRec with normalized data. + Should be run before one attempts to ingest the data into the database. + """ + # normalize issn, eissn and isbn - they are sometimes malformed by whitespace in the data + issn = record.issn + if issn: + issn = clean_and_validate_issn(issn, raise_error=False) + eissn = record.eissn + if eissn: + eissn = clean_and_validate_issn(eissn, raise_error=False) + isbn = normalize_isbn(record.isbn) if record.isbn else record.isbn + return TitleRec( + name=record.name, + isbn=isbn, + issn=issn, + eissn=eissn, + doi=record.doi, + pub_type=record.pub_type, + ) + def get_or_create(self, record: TitleRec) -> Optional[int]: if not record.name: logger.warning( @@ -64,23 +80,7 @@ def get_or_create(self, record: TitleRec) -> Optional[int]: record.doi, ) return None - # normalize issn, eissn and isbn - the are sometimes malformed by whitespace in the data - issn = record.issn - if issn: - try: - issn = clean_and_validate_issn(issn) - except ValidationError as e: - logger.error(f'Error: {e}') - issn = '' - eissn = record.eissn - if eissn: - try: - eissn = clean_and_validate_issn(eissn) - except ValidationError as e: - logger.error(f'Error: {e}') - eissn = '' - isbn = record.isbn.replace(' ', '') if record.isbn else record.isbn - key = (record.name, isbn, issn, eissn, record.doi) + key = (record.name, record.isbn, record.issn, record.eissn, record.doi) if key in self.key_to_title_id_and_pub_type: title_pk, db_pub_type = self.key_to_title_id_and_pub_type[key] # check if we need to improve the pub_type from UNKNOWN to something better @@ -94,9 +94,9 @@ def get_or_create(self, record: TitleRec) -> Optional[int]: title = Title.objects.create( name=record.name, pub_type=record.pub_type, - isbn=isbn, - issn=issn, - eissn=eissn, + isbn=record.isbn, + issn=record.issn, + eissn=record.eissn, doi=record.doi, ) self.key_to_title_id_and_pub_type[key] = (title.pk, record.pub_type) @@ -113,11 +113,11 @@ def counter_record_to_title_rec(self, record: CounterRecord) -> TitleRec: if key == 'DOI': doi = value elif key == 'Online_ISSN': - eissn = value + eissn = clean_and_validate_issn(value, raise_error=False) if value else value elif key == 'Print_ISSN': - issn = value + issn = clean_and_validate_issn(value, raise_error=False) if value else value elif key == 'ISBN': - isbn = value + isbn = normalize_isbn(value) if value else value pub_type = self.deduce_pub_type(eissn, isbn, issn, record) # convert None values for the following attrs to empty strings isbn = '' if isbn is None else isbn diff --git a/apps/logs/logic/validation.py b/apps/logs/logic/validation.py index a31a7ee5..760e7d47 100644 --- a/apps/logs/logic/validation.py +++ b/apps/logs/logic/validation.py @@ -1,5 +1,7 @@ +import logging import re +logger = logging.getLogger(__name__) issn_matcher = re.compile(r'^\d{4}-\d{3}[\dX]$') @@ -8,8 +10,15 @@ class ValidationError(Exception): pass -def clean_and_validate_issn(text: str) -> str: +def clean_and_validate_issn(text: str, raise_error=True) -> str: clean = ''.join(text.split()) # remove all whitespace if issn_matcher.match(clean): return clean - raise ValidationError(f'Invalid ISSN: "{text}"') + if raise_error: + raise ValidationError(f'Invalid ISSN: "{text}"') + logger.warning('Invalid ISSN: "%s"', text) + return '' + + +def normalize_isbn(isbn: str) -> str: + return isbn.replace(' ', '') diff --git a/apps/nigiri/tests/test_fetching.py b/apps/nigiri/tests/test_fetching.py index 7ef5eebe..49ac8fa3 100644 --- a/apps/nigiri/tests/test_fetching.py +++ b/apps/nigiri/tests/test_fetching.py @@ -3,13 +3,13 @@ import pytest from nigiri.counter5 import Counter5ReportBase -from organizations.tests.conftest import organizations -from logs.tests.conftest import report_type_nd from nigiri.client import Sushi5Client from publications.models import Platform from sushi.logic.data_import import import_sushi_credentials from sushi.models import SushiCredentials, CounterReportType +from organizations.tests.conftest import organizations +from logs.tests.conftest import report_type_nd @pytest.mark.django_db diff --git a/apps/organizations/migrations/0015_nullable_ext_id.py b/apps/organizations/migrations/0015_nullable_ext_id.py new file mode 100644 index 00000000..8518425a --- /dev/null +++ b/apps/organizations/migrations/0015_nullable_ext_id.py @@ -0,0 +1,20 @@ +# Generated by Django 2.2.13 on 2020-06-25 07:56 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('organizations', '0014_nullable_ico'), + ] + + operations = [ + migrations.AlterField( + model_name='organization', + name='ext_id', + field=models.PositiveIntegerField( + default=None, help_text='object ID taken from EMRS', null=True, unique=True + ), + ), + ] diff --git a/apps/organizations/models.py b/apps/organizations/models.py index abd9f3a4..0f6daebe 100644 --- a/apps/organizations/models.py +++ b/apps/organizations/models.py @@ -1,6 +1,5 @@ from django.conf import settings from django.contrib.postgres.fields import JSONField -from django.core.exceptions import ValidationError from django.db import models from mptt.fields import TreeForeignKey from mptt.models import MPTTModel @@ -10,7 +9,9 @@ class Organization(MPTTModel): - ext_id = models.PositiveIntegerField(unique=True, help_text='object ID taken from EMRS') + ext_id = models.PositiveIntegerField( + unique=True, help_text='object ID taken from EMRS', null=True, default=None + ) parent = TreeForeignKey( 'self', on_delete=models.CASCADE, null=True, blank=True, related_name='children' ) diff --git a/apps/organizations/tests/conftest.py b/apps/organizations/tests/conftest.py index f8488fe6..40dda330 100644 --- a/apps/organizations/tests/conftest.py +++ b/apps/organizations/tests/conftest.py @@ -51,7 +51,7 @@ def identity_by_user_type( ): def fn(user_type): org = organizations[0] - # we do not user admin_client, master_client, etc. because the way the fixtures work + # we do not use admin_client, master_client, etc. because the way the fixtures work # they all point to the same client object which obviously does not work if user_type == 'no_user': identity = None diff --git a/apps/organizations/tests/test_api.py b/apps/organizations/tests/test_api.py index 5f7006fb..d6bc5dcc 100644 --- a/apps/organizations/tests/test_api.py +++ b/apps/organizations/tests/test_api.py @@ -15,7 +15,7 @@ class TestOrganizationAPI(object): def test_unauthorized_user(self, client, invalid_identity, authentication_headers): resp = client.get(reverse('organization-list'), **authentication_headers(invalid_identity)) - assert resp.status_code == 403 + assert resp.status_code in (403, 401) # depends on auth backend def test_authorized_user_no_orgs(self, authenticated_client): resp = authenticated_client.get(reverse('organization-list')) diff --git a/apps/publications/tests/test_title_manager.py b/apps/publications/tests/test_title_manager.py new file mode 100644 index 00000000..ba100d3e --- /dev/null +++ b/apps/publications/tests/test_title_manager.py @@ -0,0 +1,23 @@ +import pytest + +from logs.logic.data_import import TitleManager, TitleRec +from publications.models import Title + + +@pytest.mark.django_db +class TestTitleManager(object): + def test_mangled_isbn(self): + """ + Test for a bug that TitleManager looks for data in database with non-normalized isbn + but uses normalized ISBN when storing new data. This discrepancy may lead to + database level integrity error because of constraints. + :return: + """ + Title.objects.create(name='Foo', isbn='978-0-07-174521-5') + tm = TitleManager() + record = TitleRec( + name='Foo', isbn='978- 0-07-174521-5', issn='', eissn='', doi='', pub_type='U' + ) + record = tm.normalize_title_rec(record) + tm.prefetch_titles(records=[record]) + tm.get_or_create(record) diff --git a/apps/sushi/admin.py b/apps/sushi/admin.py index b2c31ea9..a4dffd98 100644 --- a/apps/sushi/admin.py +++ b/apps/sushi/admin.py @@ -79,6 +79,24 @@ def queryset(self, request, queryset): return queryset +class HistoryMode(admin.SimpleListFilter): + title = 'history mode' + parameter_name = 'hm' + + def lookups(self, request, model_admin): + return ( + ('current', 'Current only'), + ('current_and_success', 'Current and sucessful'), + ) + + def queryset(self, request, queryset): + if self.value() == 'current': + return queryset.current() + if self.value() == 'current_and_success': + return queryset.current_or_successful() + return queryset + + @admin.register(models.SushiFetchAttempt) class SushiFetchAttemptAdmin(admin.ModelAdmin): @@ -100,6 +118,7 @@ class SushiFetchAttemptAdmin(admin.ModelAdmin): 'has_import_batch', ] list_filter = [ + HistoryMode, 'download_success', 'processing_success', 'is_processed', diff --git a/apps/sushi/logic/fetching.py b/apps/sushi/logic/fetching.py index 31a368e4..ab583a3f 100644 --- a/apps/sushi/logic/fetching.py +++ b/apps/sushi/logic/fetching.py @@ -4,11 +4,12 @@ import concurrent.futures import logging import traceback -from collections import Counter +from collections import Counter, namedtuple from datetime import timedelta, date from functools import partial +from itertools import groupby from time import sleep -from typing import Optional +from typing import Optional, Tuple from dateparser import parse as parse_date from django.conf import settings @@ -351,3 +352,99 @@ def smart_decide_conflict_action(conflict): action = 'skip' # it is too soon to retry logger.debug('Smart deciding to skip attempt - it is too soon to retry') return action + + +def months_to_cover(first_month=None) -> [date]: + """ + List of dates (month starts) for which we should try to get data + """ + last_month = month_start(month_start(now().date()) - timedelta(days=15)) + first_month = first_month or parse_date(settings.SUSHI_ATTEMPT_LAST_DATE + '-01').date() + month = first_month + months_to_check = [] + while month <= last_month: + months_to_check.append(month) + month = month_start(month + timedelta(days=45)) + return months_to_check + + +DataHole = namedtuple( + 'DataHole', + ['date', 'credentials', 'counter_report', 'attempt_count', 'attempt_with_current_credentials'], +) + + +def find_holes_in_data() -> [DataHole]: + """ + Looks for months for which there should be data, but are not. The result is bound to specific + credentials and report type + :return: + """ + months = months_to_cover() + result = [] + for credentials in SushiCredentials.objects.filter(enabled=True): # type: SushiCredentials + for report_type in credentials.active_counter_reports.all(): + attempts = SushiFetchAttempt.objects.filter( + credentials=credentials, counter_report=report_type + ) + month_to_attempts = { + key: list(group) for key, group in groupby(attempts, lambda x: x.start_date) + } + for month in months: + attempts = month_to_attempts.get(month, []) + # we consider queued attempts successful because they will be tried again + # that is, holes with queued attempts are not holes :) + successful_attempts = [ + attempt for attempt in attempts if attempt.processing_success or attempt.queued + ] + # attempts with the current version of credentials + current_attempts = [ + attempt + for attempt in attempts + if attempt.credentials_version_hash == credentials.version_hash + ] + if not successful_attempts: + result.append( + DataHole( + date=month, + credentials=credentials, + counter_report=report_type, + attempt_count=len(attempts), + attempt_with_current_credentials=bool(current_attempts), + ) + ) + return result + + +def retry_holes_with_new_credentials(sleep_interval=0) -> Counter: + """ + Find holes in data using `find_holes_in_data` and decide if it makes sense to redownload them. + If yes, it do so. + :return: + """ + holes = find_holes_in_data() + logger.debug('Found %d holes to retry', len(holes)) + last_platform = None + stats = Counter() + for i, hole in enumerate(holes): # type: DataHole + cred_based_delay = hole.credentials.when_can_access() + if not hole.attempt_with_current_credentials: + # this is what we want to process - cases when sushi credentials were updated + if cred_based_delay == 0: + # we are ready to retry + logger.debug('Trying to fill hole: %s / %s', hole.credentials, hole.date) + attempt = hole.credentials.fetch_report( + counter_report=hole.counter_report, + start_date=hole.date, + end_date=month_end(hole.date), + use_url_lock=True, + ) + logger.debug('Result: %s', attempt) + stats[f'retry_{attempt.status}'] += 1 + if attempt.credentials.platform_id == last_platform: + sleep(sleep_interval) + last_platform = attempt.credentials.platform_id + else: + logger.debug('Too soon to retry - need %d s', cred_based_delay) + stats['too soon'] += 1 + return stats diff --git a/apps/sushi/migrations/0028_sushifetchattempt_credentials_version_hash.py b/apps/sushi/migrations/0028_sushifetchattempt_credentials_version_hash.py new file mode 100644 index 00000000..17631146 --- /dev/null +++ b/apps/sushi/migrations/0028_sushifetchattempt_credentials_version_hash.py @@ -0,0 +1,61 @@ +# Generated by Django 2.2.12 on 2020-05-22 13:38 +import json +from hashlib import blake2b + +from django.db import migrations +from django.db import models +from django.db.models import F + + +def get_hash(credentials): + keys = { + 'url', + 'counter_version', + 'requestor_id', + 'customer_id', + 'http_username', + 'http_password', + 'api_key', + 'extra_params', + } + data = {key: getattr(credentials, key) for key in keys} + dump = json.dumps(data, ensure_ascii=False, sort_keys=True) + return blake2b(dump.encode('utf-8'), digest_size=16).hexdigest() + + +def fill_credentials_version_hash(apps, schema_editor): + SushiFetchAttempt = apps.get_model('sushi', 'SushiFetchAttempt') + for fa in SushiFetchAttempt.objects.filter( + timestamp__gte=F('credentials__last_updated') + ).select_related('credentials'): + fa.credentials_version_hash = get_hash(fa.credentials) + fa.save() + + +def reverse(apps, schema_editor): + SushiFetchAttempt = apps.get_model('sushi', 'SushiFetchAttempt') + SushiFetchAttempt.objects.all().update(credentials_version_hash='') + + +class Migration(migrations.Migration): + + dependencies = [ + ('sushi', '0027_queue_previous_reverse_name'), + ] + + operations = [ + migrations.AddField( + model_name='sushifetchattempt', + name='credentials_version_hash', + field=models.CharField( + default='', + help_text='Hash computed from the credentials at the time this attempt was made', + max_length=32, + ), + preserve_default=False, + ), + # we are adding the hash of the credentials to attempts based on the last_updated + # date of the credentials. Therefore we need to add it before the credentials + # is modified by adding its hash + migrations.RunPython(fill_credentials_version_hash, reverse), + ] diff --git a/apps/sushi/migrations/0029_sushicredentials_version_hash.py b/apps/sushi/migrations/0029_sushicredentials_version_hash.py new file mode 100644 index 00000000..ee8bb197 --- /dev/null +++ b/apps/sushi/migrations/0029_sushicredentials_version_hash.py @@ -0,0 +1,55 @@ +# Generated by Django 2.2.12 on 2020-05-25 08:02 +import json +from hashlib import blake2b + +from django.db import migrations, models + + +def get_hash(credentials): + keys = { + 'url', + 'counter_version', + 'requestor_id', + 'customer_id', + 'http_username', + 'http_password', + 'api_key', + 'extra_params', + } + data = {key: getattr(credentials, key) for key in keys} + dump = json.dumps(data, ensure_ascii=False, sort_keys=True) + return blake2b(dump.encode('utf-8'), digest_size=16).hexdigest() + + +def fill_version_hash(apps, schema_editor): + """ + The model does not have the usual methods, etc. here, so we use a + local implementation of the hash computation + """ + SushiCredentials = apps.get_model('sushi', 'SushiCredentials') + for credentials in SushiCredentials.objects.all(): + credentials.version_hash = get_hash(credentials) + credentials.save() + + +def noop(apps, schema_editor): + pass + + +class Migration(migrations.Migration): + + dependencies = [ + ('sushi', '0028_sushifetchattempt_credentials_version_hash'), + ] + + operations = [ + migrations.AddField( + model_name='sushicredentials', + name='version_hash', + field=models.CharField( + default='', help_text='Current hash of model attributes', max_length=32 + ), + preserve_default=False, + ), + migrations.RunPython(fill_version_hash, noop), + ] diff --git a/apps/sushi/models.py b/apps/sushi/models.py index 61686628..8dfe9b0d 100644 --- a/apps/sushi/models.py +++ b/apps/sushi/models.py @@ -1,27 +1,28 @@ import os +import json import logging +import os import traceback from copy import deepcopy -from hashlib import blake2b from datetime import timedelta, datetime +from hashlib import blake2b from itertools import takewhile -from typing import Optional -import json +from typing import Optional, Dict import requests import reversion +from django.conf import settings from django.contrib.postgres.fields import JSONField from django.core.files.base import ContentFile from django.db import models +from django.db.models import F from django.db.transaction import atomic from django.utils.timezone import now from pycounter.exceptions import SushiException - -from django.conf import settings from rest_framework.exceptions import PermissionDenied from core.logic.dates import month_end -from core.models import USER_LEVEL_CHOICES, UL_CONS_ADMIN, UL_ORG_ADMIN, UL_CONS_STAFF, User +from core.models import UL_CONS_ADMIN, UL_ORG_ADMIN, UL_CONS_STAFF, User from core.task_support import cache_based_lock from logs.models import ImportBatch from nigiri.client import ( @@ -45,7 +46,6 @@ from organizations.models import Organization from publications.models import Platform - logger = logging.getLogger(__name__) COUNTER_VERSIONS = ( @@ -111,6 +111,7 @@ class SushiCredentials(models.Model): (UL_CONS_STAFF, 'Consortium staff'), (UL_CONS_ADMIN, 'Superuser'), ) + blake_hash_size = 16 organization = models.ForeignKey(Organization, on_delete=models.CASCADE) platform = models.ForeignKey(Platform, on_delete=models.CASCADE) @@ -138,6 +139,9 @@ class SushiCredentials(models.Model): default=UL_ORG_ADMIN, help_text='Only user with the same or higher level can unlock it and/or edit it', ) + version_hash = models.CharField( + max_length=blake_hash_size * 2, help_text='Current hash of model attributes' + ) class Meta: unique_together = (('organization', 'platform', 'counter_version'),) @@ -146,6 +150,13 @@ class Meta: def __str__(self): return f'{self.organization} - {self.platform}, {self.get_counter_version_display()}' + def save(self, *args, **kwargs): + """ + We override the parent save method to make sure `version_hash` is recomputed on each save + """ + self.version_hash = self.compute_version_hash() + super().save(*args, **kwargs) + def change_lock(self, user: User, level: int): """ Set the lock_level on this object @@ -218,6 +229,45 @@ def when_can_access(self, base_wait_unit=5) -> float: return diff return 0 + def version_dict(self) -> Dict: + """ + Returns a dictionary will all the attributes of this object that may be subject to + change between versions and which influence success with querying the remote + server. + It is used to store credentials version information with SushiFetchAttempts and as a + source for hashing for `credentials_version_hash`. + :return: + """ + keys = { + 'url', + 'counter_version', + 'requestor_id', + 'customer_id', + 'http_username', + 'http_password', + 'api_key', + 'extra_params', + } + return {key: getattr(self, key) for key in keys} + + @classmethod + def hash_version_dict(cls, data): + """ + Return a has of a dictionary. Must take care of possible differences in ordering of keys + :param data: + :return: + """ + dump = json.dumps(data, ensure_ascii=False, sort_keys=True) + return blake2b(dump.encode('utf-8'), digest_size=cls.blake_hash_size).hexdigest() + + def compute_version_hash(self): + """ + A hash of the variable things of current credentials - may be used to detect changes + in credentials. + :return: + """ + return self.hash_version_dict(self.version_dict()) + def fetch_report( self, counter_report: CounterReportType, @@ -244,12 +294,20 @@ def fetch_report( else: attempt_params = fetch_m(client, counter_report, start_date, end_date) attempt_params['in_progress'] = False + # add version info to the attempt + attempt_params['credentials_version_hash'] = self.compute_version_hash() + # now store it - into an existing object or a new one if fetch_attempt: for key, value in attempt_params.items(): setattr(fetch_attempt, key, value) + fetch_attempt.processing_info['credentials_version'] = self.version_dict() fetch_attempt.save() return fetch_attempt else: + if 'processing_info' in attempt_params: + attempt_params['processing_info']['credentials_version'] = self.version_dict() + else: + attempt_params['processing_info'] = {'credentials_version': self.version_dict()} attempt = SushiFetchAttempt.objects.create(**attempt_params) return attempt @@ -396,15 +454,35 @@ def _fetch_report_v5(self, client, counter_report, start_date, end_date) -> dict def where_to_store(instance: 'SushiFetchAttempt', filename): root, ext = os.path.splitext(filename) ts = now().strftime('%Y%m%d-%H%M%S.%f') + organization = instance.credentials.organization return ( - f'counter/{instance.credentials.organization.internal_id}/' + f'counter/{organization.internal_id or organization.pk}/' f'{instance.credentials.platform.short_name}/' f'{instance.credentials.counter_version}_{instance.counter_report.code}_{ts}{ext}' ) +class SushiFetchAttemptQuerySet(models.QuerySet): + def current(self): + return self.filter(credentials_version_hash=F('credentials__version_hash')) + + def successful(self, success_measure='is_processed'): + assert success_measure in ( + 'is_processed', + 'download_success', + 'processing_success', + 'contains_data', + ) + return self.filter(**{success_measure: True}) + + def current_or_successful(self, success_measure='is_processed'): + return self.current() | self.successful(success_measure=success_measure) + + class SushiFetchAttempt(models.Model): + objects = SushiFetchAttemptQuerySet.as_manager() + credentials = models.ForeignKey(SushiCredentials, on_delete=models.CASCADE) counter_report = models.ForeignKey(CounterReportType, on_delete=models.CASCADE) timestamp = models.DateTimeField(auto_now_add=True) @@ -448,11 +526,20 @@ class SushiFetchAttempt(models.Model): is_processed = models.BooleanField(default=False, help_text='Was the data converted into logs?') when_processed = models.DateTimeField(null=True, blank=True) import_batch = models.OneToOneField(ImportBatch, null=True, on_delete=models.SET_NULL) + credentials_version_hash = models.CharField( + max_length=2 * SushiCredentials.blake_hash_size, + help_text='Hash computed from the credentials at the time this attempt was made', + ) processing_info = JSONField(default=dict, help_text='Internal info') def __str__(self): return f'{self.status}: {self.credentials}, {self.counter_report}' + def save(self, *args, **kwargs): + if not self.credentials_version_hash and self.credentials: + self.credentials_version_hash = self.credentials.version_hash + super().save(*args, **kwargs) + @property def status(self): status = 'SUCCESS' diff --git a/apps/sushi/tasks.py b/apps/sushi/tasks.py index 924dfee7..39c256a4 100644 --- a/apps/sushi/tasks.py +++ b/apps/sushi/tasks.py @@ -1,12 +1,19 @@ """ Celery tasks related to SUSHI fetching """ +import logging +from collections import Counter +from datetime import datetime + import celery +from core.logic.dates import month_end from core.logic.error_reporting import email_if_fails from core.task_support import cache_based_lock -from .logic.fetching import retry_queued, fetch_new_sushi_data -from sushi.models import SushiFetchAttempt, SushiCredentials +from sushi.models import SushiFetchAttempt, SushiCredentials, CounterReportType +from .logic.fetching import retry_queued, fetch_new_sushi_data, find_holes_in_data + +logger = logging.getLogger(__name__) @celery.shared_task @@ -51,3 +58,52 @@ def fetch_new_sushi_data_for_credentials_task(credentials_id: int): credentials = SushiCredentials.objects.get(pk=credentials_id) with cache_based_lock(f'fetch_new_sushi_data_task_{credentials_id}', blocking_timeout=10): fetch_new_sushi_data(credentials=credentials) + + +@celery.shared_task +@email_if_fails +def make_fetch_attempt_task( + credentials_id: int, counter_report_id: int, start_date: datetime, end_date: datetime +): + """ + The input data are enough to specify one SushiFetchAttemps. Create it and download the + data + """ + credentials = SushiCredentials.objects.get(pk=credentials_id) + counter_report = CounterReportType.objects.get(pk=counter_report_id) + credentials.fetch_report(counter_report, start_date, end_date, use_url_lock=True) + + +@celery.shared_task +@email_if_fails +def retry_holes_with_new_credentials_task(): + """ + Finds holes in data using `find_holes_in_data` and runs redownload tasks for them. + """ + holes = find_holes_in_data() + logger.debug('Found %d holes to retry', len(holes)) + stats = Counter() + for i, hole in enumerate(holes): + cred_based_delay = hole.credentials.when_can_access() + if not hole.attempt_with_current_credentials: + # this is what we want to process - cases when sushi credentials were updated + if cred_based_delay == 0: + # we are ready to retry + logger.debug('Trying to fill hole: %s / %s', hole.credentials, hole.date) + # we use isoformat below to make sure the date is properly serialized + # when passed on by celery + make_fetch_attempt_task.apply_async( + [], + dict( + credentials_id=hole.credentials.pk, + counter_report_id=hole.counter_report.pk, + start_date=hole.date.isoformat(), + end_date=month_end(hole.date).isoformat(), + ), + priority=9, + ) + stats['started'] += 1 + else: + logger.debug('Too soon to retry - need %d s', cred_based_delay) + stats['too soon'] += 1 + logger.debug('Hole filling stats: %s', stats) diff --git a/apps/sushi/tests/conftest.py b/apps/sushi/tests/conftest.py index bdf5b696..c06bfb18 100644 --- a/apps/sushi/tests/conftest.py +++ b/apps/sushi/tests/conftest.py @@ -1,15 +1,16 @@ import pytest +from core.models import UL_ORG_ADMIN from logs.tests.conftest import report_type_nd -from sushi.models import CounterReportType +from sushi.models import CounterReportType, SushiCredentials @pytest.fixture() def counter_report_type_named(report_type_nd): - def fn(name): + def fn(name, version=5): rt = report_type_nd(0, short_name=name + 'rt') return CounterReportType.objects.create( - code=name, counter_version=5, name=name + ' title', report_type=rt + code=name, counter_version=version, name=name + ' title', report_type=rt ) yield fn @@ -21,3 +22,15 @@ def counter_report_type(report_type_nd): yield CounterReportType.objects.create( code='TR', counter_version=5, name='Title report', report_type=report_type ) + + +@pytest.fixture() +def credentials(organizations, platforms): + credentials = SushiCredentials.objects.create( + organization=organizations[0], + platform=platforms[0], + counter_version=5, + lock_level=UL_ORG_ADMIN, + url='http://a.b.c/', + ) + yield credentials diff --git a/apps/sushi/tests/test_logic_fetching.py b/apps/sushi/tests/test_logic_fetching.py new file mode 100644 index 00000000..e0d1f550 --- /dev/null +++ b/apps/sushi/tests/test_logic_fetching.py @@ -0,0 +1,185 @@ +from datetime import timedelta + +import pytest +from django.utils.timezone import now + +from core.logic.dates import month_start, month_end +from nigiri.client import Sushi5Client +from nigiri.counter5 import Counter5ReportBase +from publications.models import Platform +from ..logic.data_import import import_sushi_credentials +from ..logic.fetching import months_to_cover, find_holes_in_data, retry_holes_with_new_credentials +from sushi.models import CounterReportType, SushiCredentials, SushiFetchAttempt +from organizations.tests.conftest import organizations +from publications.tests.conftest import platforms +from logs.tests.conftest import report_type_nd +from ..tasks import retry_holes_with_new_credentials_task + + +@pytest.fixture(scope='session') +def celery_config(): + return { + 'task_always_eager': True, + } + + +class TestHelperFunctions(object): + def test_months_to_cover_no_first_month(self, settings): + today = now().date() + # set the date to 4 months before today + first_month = month_start(month_start(today) - timedelta(days=100)) + settings.SUSHI_ATTEMPT_LAST_DATE = first_month.isoformat()[:7] + months = months_to_cover() + assert len(months) == 4 + assert months[0] == first_month + assert months[-1] == month_start(month_start(today) - timedelta(days=15)) + + +@pytest.mark.django_db +class TestHoleFillingMachinery(object): + def test_find_holes_in_data(self, settings, organizations, report_type_nd): + """ + Tests the `find_holes_in_data` function. + """ + # set the date to 3 months before today + first_month = month_start(month_start(now().date()) - timedelta(days=80)) + settings.SUSHI_ATTEMPT_LAST_DATE = first_month.isoformat()[:7] + # create all the prerequisites + data = [ + { + 'platform': 'XXX', + 'organization': organizations[1].internal_id, + 'customer_id': 'BBB', + 'requestor_id': 'RRRX', + 'URL': 'http://this.is/test/2', + 'version': 5, + 'extra_attrs': 'auth=un,pass;api_key=kekekeyyy;foo=bar', + }, + ] + Platform.objects.create(short_name='XXX', name='XXXX', ext_id=10) + import_sushi_credentials(data) + assert SushiCredentials.objects.count() == 1 + cr1 = SushiCredentials.objects.get() + cr1.create_sushi_client() + report = CounterReportType.objects.create( + code='tr', name='tr', counter_version=5, report_type=report_type_nd(0) + ) + cr1.active_counter_reports.add(report) + + def mock_get_report_data(*args, **kwargs): + return Counter5ReportBase() + + Sushi5Client.get_report_data = mock_get_report_data + # test that find_holes_in_data returns the right number of records + holes = find_holes_in_data() + assert len(holes) == 3 + # add an attempt and try again + attempt = cr1.fetch_report(report, start_date=first_month, end_date=month_end(first_month)) + assert attempt.processing_success + holes = find_holes_in_data() + assert len(holes) == 2 + assert holes[0].attempt_count == 0 + # add a failed attempt for the same month + attempt = cr1.fetch_report(report, start_date=first_month, end_date=month_end(first_month)) + attempt.processing_success = False + attempt.save() + holes = find_holes_in_data() + assert len(holes) == 2, 'nothing should change' + assert holes[0].attempt_count == 0 + # add a failed attempt for the next month + next_month = month_start(first_month + timedelta(days=45)) + attempt = cr1.fetch_report(report, start_date=next_month, end_date=month_end(next_month)) + attempt.processing_success = False + attempt.save() + holes = find_holes_in_data() + assert len(holes) == 2, 'nothing should change' + assert holes[0].attempt_count == 1 + + def test_retry_holes_with_new_credentials(self, settings, organizations, report_type_nd): + """ + Tests the `find_holes_in_data` function. + """ + # set the date to 3 months before today + first_month = month_start(month_start(now().date()) - timedelta(days=80)) + settings.SUSHI_ATTEMPT_LAST_DATE = first_month.isoformat()[:7] + # create all the prerequisites + data = [ + { + 'platform': 'XXX', + 'organization': organizations[1].internal_id, + 'customer_id': 'BBB', + 'requestor_id': 'RRRX', + 'URL': 'http://this.is/test/2', + 'version': 5, + 'extra_attrs': 'auth=un,pass;api_key=kekekeyyy;foo=bar', + }, + ] + Platform.objects.create(short_name='XXX', name='XXXX', ext_id=10) + import_sushi_credentials(data) + assert SushiCredentials.objects.count() == 1 + cr1 = SushiCredentials.objects.get() + cr1.create_sushi_client() + report = CounterReportType.objects.create( + code='tr', name='tr', counter_version=5, report_type=report_type_nd(0) + ) + cr1.active_counter_reports.add(report) + + def mock_get_report_data(*args, **kwargs): + return Counter5ReportBase() + + Sushi5Client.get_report_data = mock_get_report_data + # test that find_holes_in_data returns the right number of records + holes = find_holes_in_data() + assert len(holes) == 3 + # add an attempt and try again + assert SushiFetchAttempt.objects.count() == 0 + retry_holes_with_new_credentials() + assert SushiFetchAttempt.objects.count() == 3 + holes = find_holes_in_data() + assert len(holes) == 0 + + @pytest.mark.now + def test_retry_holes_with_new_credentials_task( + self, settings, organizations, report_type_nd, celery_session_worker + ): + """ + Tests the `find_holes_in_data` function. + """ + # set the date to 3 months before today + first_month = month_start(month_start(now().date()) - timedelta(days=80)) + settings.SUSHI_ATTEMPT_LAST_DATE = first_month.isoformat()[:7] + # create all the prerequisites + data = [ + { + 'platform': 'XXX', + 'organization': organizations[1].internal_id, + 'customer_id': 'BBB', + 'requestor_id': 'RRRX', + 'URL': 'http://this.is/test/2', + 'version': 5, + 'extra_attrs': 'auth=un,pass;api_key=kekekeyyy;foo=bar', + }, + ] + Platform.objects.create(short_name='XXX', name='XXXX', ext_id=10) + import_sushi_credentials(data) + assert SushiCredentials.objects.count() == 1 + cr1 = SushiCredentials.objects.get() + cr1.create_sushi_client() + report = CounterReportType.objects.create( + code='tr', name='tr', counter_version=5, report_type=report_type_nd(0) + ) + cr1.active_counter_reports.add(report) + + def mock_get_report_data(*args, **kwargs): + return Counter5ReportBase() + + Sushi5Client.get_report_data = mock_get_report_data + # test that find_holes_in_data returns the right number of records + holes = find_holes_in_data() + assert len(holes) == 3 + # add an attempt and try again + assert SushiFetchAttempt.objects.count() == 0 + retry_holes_with_new_credentials_task() + assert SushiFetchAttempt.objects.count() == 3 + holes = find_holes_in_data() + assert len(holes) == 0 diff --git a/apps/sushi/tests/test_sushfetchattempt_model.py b/apps/sushi/tests/test_sushfetchattempt_model.py new file mode 100644 index 00000000..f3cf4ff8 --- /dev/null +++ b/apps/sushi/tests/test_sushfetchattempt_model.py @@ -0,0 +1,82 @@ +import pytest + +from django.core.files.base import ContentFile + +from core.models import UL_ORG_ADMIN +from organizations.models import Organization +from publications.models import Platform +from sushi.models import SushiFetchAttempt, SushiCredentials + + +@pytest.mark.django_db +class TestFileName: + """ Test class for checking whether setting the file name + work as expected + """ + + @pytest.mark.parametrize( + ('internal_id', 'platform_name', 'version', 'code', 'ext'), + ( + ('internal1', 'platform_1', 5, 'TR', 'json'), + (None, 'platform_2', 5, 'TR', 'json'), + (None, 'platform_1', 4, 'JR1', 'tsv'), + ('internal2', 'platform_1', 4, 'JR1', 'tsv'), + ), + ) + def test_file_name( + self, counter_report_type_named, internal_id, platform_name, version, code, ext, + ): + counter_report_type = counter_report_type_named(code, version) + platform = Platform.objects.create(short_name=platform_name, name=platform_name, ext_id=10) + + organization = Organization.objects.create( + # ext_id=1, + # parent=None, + internal_id=internal_id, + # ico='123', + # name_cs='AAA', + # name_en='AAA', + # short_name='AA', + ) + + credentials = SushiCredentials.objects.create( + organization=organization, + platform=platform, + counter_version=version, + lock_level=UL_ORG_ADMIN, + url='http://a.b.c/', + ) + + data_file = ContentFile("b") + data_file.name = f"report.{ext}" + + fetch_attempt = SushiFetchAttempt.objects.create( + credentials=credentials, + counter_report=counter_report_type, + start_date="2020-01-01", + end_date="2020-02-01", + data_file=data_file, + credentials_version_hash=credentials.compute_version_hash(), + ) + + assert fetch_attempt.data_file.name.startswith( + f"counter/{internal_id or organization.pk}/{ platform_name }/{ version }_{code}" + ) + + +@pytest.mark.django_db +class TestSushiFetchAttemptModelManager(object): + def test_custom_manager_methods_exist(self): + """ + Test that custom manager methods exist at all + """ + SushiFetchAttempt.objects.all() + SushiFetchAttempt.objects.current() + SushiFetchAttempt.objects.current_or_successful() + + def test_custom_manager_methods_exist_on_queryset(self): + """ + Test that custom manager methods are also available on querysets for SushiFetchAttempts + """ + SushiFetchAttempt.objects.filter(download_success=True).current() + SushiFetchAttempt.objects.filter(download_success=True).current_or_successful() diff --git a/apps/sushi/tests/test_sushicredentials_model.py b/apps/sushi/tests/test_sushicredentials_model.py index 171ff30b..0b0f4673 100644 --- a/apps/sushi/tests/test_sushicredentials_model.py +++ b/apps/sushi/tests/test_sushicredentials_model.py @@ -2,9 +2,11 @@ from rest_framework.exceptions import PermissionDenied from core.models import UL_CONS_ADMIN, UL_ORG_ADMIN, UL_CONS_STAFF, Identity +from nigiri.client import Sushi5Client +from nigiri.counter5 import Counter5ReportBase from organizations.models import UserOrganization from sushi.logic.data_import import import_sushi_credentials -from ..models import SushiCredentials +from ..models import SushiCredentials, CounterReportType, SushiFetchAttempt from publications.models import Platform from organizations.tests.conftest import organizations from publications.tests.conftest import platforms @@ -102,3 +104,125 @@ def _test_change_lock(cls, credentials, user, level, can): else: with pytest.raises(PermissionDenied): credentials.change_lock(user, level) + + +@pytest.mark.django_db +class TestCredentialsVersioning(object): + def test_version_hash_is_stored(self, organizations): + """ + Tests that version_hash is computed and store on save + """ + data = [ + { + 'platform': 'XXX', + 'organization': organizations[1].internal_id, + 'customer_id': 'BBB', + 'requestor_id': 'RRRX', + 'URL': 'http://this.is/test/2', + 'version': 5, + 'extra_attrs': 'auth=un,pass;api_key=kekekeyyy;foo=bar', + }, + ] + Platform.objects.create(short_name='XXX', name='XXXX', ext_id=10) + import_sushi_credentials(data) + assert SushiCredentials.objects.count() == 1 + cr1 = SushiCredentials.objects.get() + assert cr1.version_hash != '' + assert cr1.version_hash == cr1.compute_version_hash() + old_hash = cr1.version_hash + cr1.api_key = 'new_api_key' + assert cr1.compute_version_hash() != cr1.version_hash, 'no change without a save' + cr1.save() + assert cr1.compute_version_hash() == cr1.version_hash + assert cr1.version_hash != old_hash + + def test_version_hash_changes(self, organizations): + """ + Tests that computation of version_hash from `SushiCredentials` can really distinguish + between different versions of the same object + """ + data = [ + { + 'platform': 'XXX', + 'organization': organizations[1].internal_id, + 'customer_id': 'BBB', + 'requestor_id': 'RRRX', + 'URL': 'http://this.is/test/2', + 'version': 5, + 'extra_attrs': 'auth=un,pass;api_key=kekekeyyy;foo=bar', + }, + ] + Platform.objects.create(short_name='XXX', name='XXXX', ext_id=10) + import_sushi_credentials(data) + assert SushiCredentials.objects.count() == 1 + cr1 = SushiCredentials.objects.get() + hash1 = cr1.compute_version_hash() + cr1.requestor_id = 'new_id' + hash2 = cr1.compute_version_hash() + assert hash2 != hash1 + cr1.api_key = 'new_api_key' + assert cr1.compute_version_hash() != hash1 + assert cr1.compute_version_hash() != hash2 + + def test_version_hash_does_not_change(self, organizations): + """ + Tests that value of version_hash from `SushiCredentials` does not change when some + unrelated changes are made + """ + data = [ + { + 'platform': 'XXX', + 'organization': organizations[1].internal_id, + 'customer_id': 'BBB', + 'requestor_id': 'RRRX', + 'URL': 'http://this.is/test/2', + 'version': 5, + 'extra_attrs': 'auth=un,pass;api_key=kekekeyyy;foo=bar', + }, + ] + Platform.objects.create(short_name='XXX', name='XXXX', ext_id=10) + import_sushi_credentials(data) + assert SushiCredentials.objects.count() == 1 + cr1 = SushiCredentials.objects.get() + hash1 = cr1.compute_version_hash() + cr1.last_updated_by = None + cr1.outside_consortium = True + cr1.save() + assert cr1.compute_version_hash() == hash1 + + def test_version_info_is_stored_in_fetch_attempt(self, organizations, report_type_nd): + """ + Tests that when we fetch data using `SushiCredentials`, the `SushiFetchAttempt` that is + created contains information about the credentials version - both in `processing_info` + and in `credentials_version_hash` + """ + data = [ + { + 'platform': 'XXX', + 'organization': organizations[1].internal_id, + 'customer_id': 'BBB', + 'requestor_id': 'RRRX', + 'URL': 'http://this.is/test/2', + 'version': 5, + 'extra_attrs': 'auth=un,pass;api_key=kekekeyyy;foo=bar', + }, + ] + Platform.objects.create(short_name='XXX', name='XXXX', ext_id=10) + import_sushi_credentials(data) + assert SushiCredentials.objects.count() == 1 + cr1 = SushiCredentials.objects.get() + cr1.create_sushi_client() + report = CounterReportType.objects.create( + code='tr', name='tr', counter_version=5, report_type=report_type_nd(0) + ) + + def mock_get_report_data(*args, **kwargs): + return Counter5ReportBase() + + Sushi5Client.get_report_data = mock_get_report_data + attempt: SushiFetchAttempt = cr1.fetch_report( + report, start_date='2020-01-01', end_date='2020-01-31' + ) + assert 'credentials_version' in attempt.processing_info + assert attempt.credentials_version_hash != '' + assert attempt.credentials_version_hash == cr1.compute_version_hash() diff --git a/apps/sushi/tests/test_views.py b/apps/sushi/tests/test_views.py index 170ca709..d37c096b 100644 --- a/apps/sushi/tests/test_views.py +++ b/apps/sushi/tests/test_views.py @@ -1,12 +1,22 @@ +from unittest.mock import patch + import pytest from django.urls import reverse from core.models import UL_ORG_ADMIN, UL_CONS_ADMIN, UL_CONS_STAFF, Identity from organizations.models import UserOrganization -from sushi.models import SushiCredentials -from organizations.tests.conftest import organizations +from sushi.models import SushiCredentials, SushiFetchAttempt +from organizations.tests.conftest import organizations, identity_by_user_type from publications.tests.conftest import platforms -from core.tests.conftest import master_client, master_identity, valid_identity, authenticated_client +from core.tests.conftest import ( + master_client, + master_identity, + valid_identity, + authenticated_client, + authentication_headers, + admin_identity, + invalid_identity, +) @pytest.mark.django_db() @@ -213,3 +223,280 @@ def test_destroy_locked_lower( resp = authenticated_client.delete(url) assert resp.status_code == 204 assert SushiCredentials.objects.count() == 0 + + +@pytest.mark.django_db() +class TestSushiFetchAttemptStatsView(object): + def test_no_dates_mode_all( + self, organizations, platforms, counter_report_type_named, master_client + ): + """ + Test that the api view works when the requested data does not contain dates and all + attempts are requested + """ + credentials = SushiCredentials.objects.create( + organization=organizations[0], + platform=platforms[0], + counter_version=5, + lock_level=UL_ORG_ADMIN, + url='http://a.b.c/', + ) + new_rt1 = counter_report_type_named('new1') + SushiFetchAttempt.objects.create( + credentials=credentials, + start_date='2020-01-01', + end_date='2020-01-31', + credentials_version_hash=credentials.version_hash, + counter_report=new_rt1, + ) + assert SushiFetchAttempt.objects.count() == 1 + url = reverse('sushi-fetch-attempt-stats') + resp = master_client.get(url + '?mode=all') + assert resp.status_code == 200 + data = resp.json() + assert len(data) == 1 + assert data[0]['failure_count'] == 1 + + def test_no_dates_mode_current( + self, organizations, platforms, counter_report_type_named, master_client + ): + """ + Test that the api view works when the requested data does not contain dates and all + attempts are requested + """ + credentials = SushiCredentials.objects.create( + organization=organizations[0], + platform=platforms[0], + counter_version=5, + lock_level=UL_ORG_ADMIN, + url='http://a.b.c/', + ) + new_rt1 = counter_report_type_named('new1') + SushiFetchAttempt.objects.create( + credentials=credentials, + start_date='2020-01-01', + end_date='2020-01-31', + credentials_version_hash=credentials.version_hash, + counter_report=new_rt1, + ) + assert SushiFetchAttempt.objects.count() == 1 + # now update the credentials so that the attempt is no longer related to the current + # version + credentials.customer_id = 'new_id' + credentials.save() + # let's try it - there should be nothing + url = reverse('sushi-fetch-attempt-stats') + resp = master_client.get(url + '?mode=current') + assert resp.status_code == 200 + data = resp.json() + assert len(data) == 0 + + def test_no_dates_mode_current_2( + self, organizations, platforms, counter_report_type_named, master_client + ): + """ + Test that the api view works when the requested data does not contain dates and all + attempts are requested + """ + credentials = SushiCredentials.objects.create( + organization=organizations[0], + platform=platforms[0], + counter_version=5, + lock_level=UL_ORG_ADMIN, + url='http://a.b.c/', + ) + new_rt1 = counter_report_type_named('new1') + SushiFetchAttempt.objects.create( + credentials=credentials, + start_date='2020-01-01', + end_date='2020-01-31', + credentials_version_hash=credentials.version_hash, + counter_report=new_rt1, + ) + assert SushiFetchAttempt.objects.count() == 1 + # now update the credentials so that the attempt is no longer related to the current + # version + credentials.customer_id = 'new_id' + credentials.save() + # create a second attempt, this one with current version + SushiFetchAttempt.objects.create( + credentials=credentials, + start_date='2020-01-01', + end_date='2020-01-31', + credentials_version_hash=credentials.version_hash, + counter_report=new_rt1, + ) + assert SushiFetchAttempt.objects.count() == 2 + # let's try it - there should be nothing + url = reverse('sushi-fetch-attempt-stats') + resp = master_client.get(url + '?mode=current') + assert resp.status_code == 200 + data = resp.json() + assert len(data) == 1 + assert data[0]['failure_count'] == 1 + # let's check that with mode=all there would be two + resp = master_client.get(url + '?mode=all') + assert resp.status_code == 200 + data = resp.json() + assert len(data) == 1 + assert data[0]['failure_count'] == 2 + + def test_no_dates_mode_success_and_current( + self, organizations, platforms, counter_report_type_named, master_client + ): + """ + Test that the api view works when the requested data does not contain dates and all + attempts are requested + """ + credentials = SushiCredentials.objects.create( + organization=organizations[0], + platform=platforms[0], + counter_version=5, + lock_level=UL_ORG_ADMIN, + url='http://a.b.c/', + ) + new_rt1 = counter_report_type_named('new1') + # one success + SushiFetchAttempt.objects.create( + credentials=credentials, + start_date='2020-01-01', + end_date='2020-01-31', + credentials_version_hash=credentials.version_hash, + counter_report=new_rt1, + contains_data=True, + ) + # one failure + SushiFetchAttempt.objects.create( + credentials=credentials, + start_date='2020-01-01', + end_date='2020-01-31', + credentials_version_hash=credentials.version_hash, + counter_report=new_rt1, + contains_data=False, + ) + assert SushiFetchAttempt.objects.count() == 2 + # now update the credentials so that the attempt is no longer related to the current + # version + credentials.customer_id = 'new_id' + credentials.save() + # create a second attempt, this one with current version + # one new failure + SushiFetchAttempt.objects.create( + credentials=credentials, + start_date='2020-01-01', + end_date='2020-01-31', + credentials_version_hash=credentials.version_hash, + counter_report=new_rt1, + contains_data=False, + ) + assert SushiFetchAttempt.objects.count() == 3 + # let's try it - there should be nothing + url = reverse('sushi-fetch-attempt-stats') + resp = master_client.get(url + '?mode=success_and_current&success_metric=contains_data') + assert resp.status_code == 200 + data = resp.json() + assert len(data) == 1 + assert data[0]['success_count'] == 1 + assert data[0]['failure_count'] == 1 + # let's check that with mode=current there would be only one + resp = master_client.get(url + '?mode=current&success_metric=contains_data') + assert resp.status_code == 200 + data = resp.json() + assert len(data) == 1 + assert data[0]['success_count'] == 0 + assert data[0]['failure_count'] == 1 + # let's check that with mode=all there would be three + resp = master_client.get(url + '?mode=all&success_metric=contains_data') + assert resp.status_code == 200 + data = resp.json() + assert len(data) == 1 + assert data[0]['success_count'] == 1 + assert data[0]['failure_count'] == 2 + + +@pytest.mark.now +@pytest.mark.django_db() +class TestSushiFetchAttemptView(object): + def test_create(self, master_client, credentials, counter_report_type): + # we must patch the run_sushi_fetch_attempt_task task in order to prevent stalling + # during tests by CI + with patch('sushi.views.run_sushi_fetch_attempt_task') as task_mock: + resp = master_client.post( + reverse('sushi-fetch-attempt-list'), + { + 'credentials': credentials.pk, + 'start_date': '2020-01-01', + 'end_date': '2020-01-31', + 'counter_report': counter_report_type.pk, + }, + ) + assert task_mock.apply_async.call_count == 1 + assert resp.status_code == 201 + assert 'pk' in resp.json() + + @pytest.mark.parametrize( + ['user_type', 'can_create', 'return_code'], + [ + ['no_user', False, 403], + ['invalid', False, 403], + ['unrelated', False, 403], + ['related_user', False, 403], + ['related_admin', True, 201], + ['master_user', True, 201], + ['superuser', True, 201], + ], + ) + def test_create_api_access( + self, + user_type, + can_create, + return_code, + client, + authentication_headers, + credentials, + counter_report_type, + identity_by_user_type, + ): + identity, org = identity_by_user_type(user_type) + + with patch('sushi.views.run_sushi_fetch_attempt_task') as task_mock: + resp = client.post( + reverse('sushi-fetch-attempt-list'), + { + 'credentials': credentials.pk, + 'start_date': '2020-01-01', + 'end_date': '2020-01-31', + 'counter_report': counter_report_type.pk, + }, + **authentication_headers(identity), + ) + assert resp.status_code == return_code + if can_create: + assert task_mock.apply_async.call_count == 1 + else: + assert task_mock.apply_async.call_count == 0 + + def test_detail_available_after_create(self, master_client, credentials, counter_report_type): + """ + Check that if we create an attempt, it will be available using the same API later. + This test was created because after introducing default filtering of attempts + to successful+current, this was not true + """ + with patch('sushi.views.run_sushi_fetch_attempt_task') as task_mock: + resp = master_client.post( + reverse('sushi-fetch-attempt-list'), + { + 'credentials': credentials.pk, + 'start_date': '2020-01-01', + 'end_date': '2020-01-31', + 'counter_report': counter_report_type.pk, + }, + ) + assert task_mock.apply_async.call_count == 1 + assert resp.status_code == 201 + create_data = resp.json() + pk = create_data['pk'] + # now get the details of the attempt using GET + resp = master_client.get(reverse('sushi-fetch-attempt-detail', args=(pk,))) + assert resp.status_code == 200 + assert resp.json() == create_data diff --git a/apps/sushi/urls.py b/apps/sushi/urls.py index 015fa15a..f0228a4b 100644 --- a/apps/sushi/urls.py +++ b/apps/sushi/urls.py @@ -6,10 +6,16 @@ router = SimpleRouter() router.register(r'sushi-credentials', views.SushiCredentialsViewSet, basename='sushi-credentials') router.register(r'counter-report-type', views.CounterReportTypeViewSet) -router.register(r'sushi-fetch-attempt', views.SushiFetchAttemptViewSet) +router.register( + r'sushi-fetch-attempt', views.SushiFetchAttemptViewSet, basename='sushi-fetch-attempt' +) urlpatterns = [ - path('sushi-fetch-attempt-stats/', views.SushiFetchAttemptStatsView.as_view()), + path( + 'sushi-fetch-attempt-stats/', + views.SushiFetchAttemptStatsView.as_view(), + name='sushi-fetch-attempt-stats', + ), path('run-task/fetch-new-sushi-data', views.StartFetchNewSushiDataTask.as_view()), path( 'run-task/fetch-new-sushi-data/', diff --git a/apps/sushi/views.py b/apps/sushi/views.py index bcd1e95d..3b2be55e 100644 --- a/apps/sushi/views.py +++ b/apps/sushi/views.py @@ -3,7 +3,7 @@ import dateparser import reversion -from django.db.models import Count, Q, Max, Min +from django.db.models import Count, Q, Max, Min, F from django.http import HttpResponseBadRequest from django.shortcuts import get_object_or_404 from django.utils.decorators import method_decorator @@ -15,7 +15,7 @@ from reversion.views import create_revision from core.logic.dates import month_start, month_end -from core.models import UL_CONS_STAFF +from core.models import UL_CONS_STAFF, REL_ORG_ADMIN from core.permissions import SuperuserOrAdminPermission, OrganizationRelatedPermissionMixin from organizations.logic.queries import organization_filter_from_org_id from sushi.models import CounterReportType, SushiFetchAttempt @@ -111,6 +111,9 @@ class SushiFetchAttemptViewSet(ModelViewSet): queryset = SushiFetchAttempt.objects.none() http_method_names = ['get', 'post', 'options', 'head'] + def get_object(self): + return super().get_object() + def get_queryset(self): organizations = self.request.user.accessible_organizations() filter_params = {} @@ -135,11 +138,26 @@ def get_queryset(self): if 'counter_version' in self.request.query_params: counter_version = self.request.query_params['counter_version'] filter_params['credentials__counter_version'] = counter_version - return SushiFetchAttempt.objects.filter(**filter_params).select_related( + mode = self.request.query_params.get('mode') + if mode == 'success_and_current': + qs = SushiFetchAttempt.objects.current_or_successful() + elif mode == 'current': + qs = SushiFetchAttempt.objects.current() + else: + qs = SushiFetchAttempt.objects.all() + return qs.filter(**filter_params).select_related( 'counter_report', 'credentials__organization' ) def perform_create(self, serializer: SushiFetchAttemptSerializer): + # check that the user is allowed to create attempts for this organization + credentials = serializer.validated_data['credentials'] + org_relation = self.request.user.organization_relationship(credentials.organization_id) + if org_relation < REL_ORG_ADMIN: + raise PermissionDenied( + 'user is not allowed to start fetch attempts for this organization' + ) + # proceed with creation serializer.validated_data['in_progress'] = True serializer.validated_data['end_date'] = month_end(serializer.validated_data['end_date']) super().perform_create(serializer) @@ -155,30 +173,41 @@ class SushiFetchAttemptStatsView(APIView): 'organization': ('credentials__organization', 'credentials__organization__name'), } + modes = { + 'current': '', # only attempts that match the current version of their credentials + 'success_and_current': '', # all successful and unsuccessful for current version of creds + 'all': '', # all attempts + } + default_mode = 'current' + key_to_attr_map = {value[1]: key for key, value in attr_to_query_param_map.items()} key_to_attr_map.update( {value[0]: key + '_id' for key, value in attr_to_query_param_map.items()} ) - success_metrics = ['download_success', 'processing_success', 'contains_data'] + success_metrics = ['download_success', 'processing_success', 'contains_data', 'is_processed'] def get(self, request): organizations = request.user.accessible_organizations() - filter_params = {} + filter_params = [] if 'organization' in request.query_params: - filter_params['credentials__organization'] = get_object_or_404( - organizations, pk=request.query_params['organization'] + filter_params.append( + Q( + credentials__organization=get_object_or_404( + organizations, pk=request.query_params['organization'] + ) + ) ) else: - filter_params['credentials__organization__in'] = organizations + filter_params.append(Q(credentials__organization__in=organizations)) if 'platform' in request.query_params: - filter_params['credentials__platform_id'] = request.query_params['platform'] + filter_params.append(Q(credentials__platform_id=request.query_params['platform'])) if 'date_from' in request.query_params: date_from = dateparser.parse(request.query_params['date_from']) if date_from: - filter_params['timestamp__date__gte'] = date_from + filter_params.append(Q(timestamp__date__gte=date_from)) if 'counter_version' in request.query_params: counter_version = request.query_params['counter_version'] - filter_params['credentials__counter_version'] = counter_version + filter_params.append(Q(credentials__counter_version=counter_version)) # what should be in the result? x = request.query_params.get('x', 'report') y = request.query_params.get('y', 'platform') @@ -186,6 +215,22 @@ def get(self, request): success_metric = request.query_params.get('success_metric', self.success_metrics[-1]) if success_metric not in self.success_metrics: success_metric = self.success_metrics[-1] + # deal with mode - we need to add extra filters for some of the modes + mode = request.query_params.get('mode', self.default_mode) + if mode not in self.modes: + mode = self.default_mode + if mode == 'all': + # there is nothing to do here + pass + elif mode == 'current': + filter_params.append(Q(credentials_version_hash=F('credentials__version_hash'))) + elif mode == 'success_and_current': + # all successful + other that match current version of credentials + filter_params.append( + Q(**{success_metric: True}) + | Q(credentials_version_hash=F('credentials__version_hash')) + ) + # fetch the data - we have different code in presence and absence of date in the data if x != 'month' and y != 'month': data = self.get_data_no_months(x, y, filter_params, success_metric) else: @@ -197,7 +242,7 @@ def get(self, request): out.append({self.key_to_attr_map.get(key, key): value for key, value in obj.items()}) return Response(out) - def get_data_no_months(self, x, y, filter_params, success_metric): + def get_data_no_months(self, x, y, filter_params: [], success_metric): if x not in self.attr_to_query_param_map: return HttpResponseBadRequest('unsupported x dimension: "{}"'.format(x)) if y not in self.attr_to_query_param_map: @@ -209,7 +254,7 @@ def get_data_no_months(self, x, y, filter_params, success_metric): values.extend(self.attr_to_query_param_map[y]) # now get the output qs = ( - SushiFetchAttempt.objects.filter(**filter_params) + SushiFetchAttempt.objects.filter(*filter_params) .values(*values) .annotate( success_count=Count('pk', filter=Q(**{success_metric: True})), @@ -218,7 +263,7 @@ def get_data_no_months(self, x, y, filter_params, success_metric): ) return qs - def get_data_with_months(self, dim, filter_params, success_metric): + def get_data_with_months(self, dim, filter_params: [], success_metric): if dim not in self.attr_to_query_param_map: return HttpResponseBadRequest('unsupported dimension: "{}"'.format(dim)) # we use 2 separate fields for dim in order to preserve both the ID of the @@ -232,7 +277,7 @@ def get_data_with_months(self, dim, filter_params, success_metric): while cur_date < end: # now get the output for rec in ( - SushiFetchAttempt.objects.filter(**filter_params) + SushiFetchAttempt.objects.filter(*filter_params) .filter(start_date__lte=cur_date, end_date__gte=cur_date) .values(*values) .annotate( diff --git a/config/settings/base.py b/config/settings/base.py index 66db5667..cbcdb7e5 100644 --- a/config/settings/base.py +++ b/config/settings/base.py @@ -222,6 +222,15 @@ CELERY_BROKER_URL = 'redis://localhost' CELERY_TIMEZONE = TIME_ZONE +# Note about priorities - it is not clear from the Celery documentation, but from my experiments +# it looks like: +# - 0 means "higher" priority - it gets processed first +# 9 is the "lowest" priority - gets processed last +# - default priority is 0 (or 1 or 2 - these end up in the same list) + +CELERY_BROKER_TRANSPORT_OPTIONS = { + 'queue_order_strategy': 'priority', +} CELERY_TASK_ROUTES = { 'logs.tasks.sync_interest_task': {'queue': 'interest'}, @@ -281,6 +290,8 @@ SUSHI_ATTEMPT_LAST_DATE = '2017-01' # default date where to end fetching sushi data REFERENCE_CURRENCY = 'CZK' # this is the currency used for price calculation +SENTRY_URL = None + LOGGING = { 'version': 1, 'disable_existing_loggers': False, @@ -302,7 +313,7 @@ for key in ("SECRET_KEY",): locals()[key] = secrets[key] # optional keys -for key in ("ERMS_API_URL",): +for key in ("ERMS_API_URL", "SENTRY_URL"): if key in secrets: locals()[key] = secrets[key] diff --git a/config/settings/production.py b/config/settings/production.py index 5ba3e773..7a13e569 100644 --- a/config/settings/production.py +++ b/config/settings/production.py @@ -1,5 +1,10 @@ from .base import * +import sentry_sdk +from sentry_sdk.integrations.django import DjangoIntegration +from sentry_sdk.integrations.celery import CeleryIntegration +from sentry_sdk.integrations.redis import RedisIntegration + ALLOWED_HOSTS = ['stats.czechelib.cz'] LIVE_ERMS_AUTHENTICATION = True @@ -12,3 +17,12 @@ EMAIL_HOST = 'smtp.ntkcz.cz' SERVER_EMAIL = 'admin@stats.czechelib.cz' + + +# sentry +if SENTRY_URL: + sentry_sdk.init( + dsn=SENTRY_URL, + integrations=[DjangoIntegration(), CeleryIntegration(), RedisIntegration()], + send_default_pii=True, + ) diff --git a/design/ui/src/components/SushiAttemptListWidget.vue b/design/ui/src/components/SushiAttemptListWidget.vue index a064f390..673e808b 100644 --- a/design/ui/src/components/SushiAttemptListWidget.vue +++ b/design/ui/src/components/SushiAttemptListWidget.vue @@ -28,7 +28,7 @@ cs: - + @@ -57,11 +57,20 @@ cs:
{{ $t('organization') }}:
- - - - - + + + + + + + + + + + + + +
@@ -144,15 +153,16 @@ cs: diff --git a/design/ui/src/pages/SushiFetchAttemptsPage.vue b/design/ui/src/pages/SushiFetchAttemptsPage.vue index dce5e695..b3aebb37 100644 --- a/design/ui/src/pages/SushiFetchAttemptsPage.vue +++ b/design/ui/src/pages/SushiFetchAttemptsPage.vue @@ -52,7 +52,10 @@ cs: :label="$t('success_metric')" > - + + + + @@ -141,12 +144,13 @@ cs: