Skip to content

Bump lxml from 4.5.2 to 4.6.2 in /requirements #7

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 22 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
22 commits
Select commit Hold shift + click to select a range
f482831
add storage of credentials version into SushiFetchAttempt - refs #44
beda42 May 22, 2020
8288aa2
add version_hash attribute to the SushiCredentials object
beda42 May 25, 2020
55790f9
fix hash computation in SushiCredentials migration
beda42 May 25, 2020
c9e2457
implement utility function for finding holes in Counter data - naive …
beda42 May 26, 2020
cb6b464
add code to retry holes in data in case the credentials have been upd…
beda42 Jun 2, 2020
00b59f1
implement sushi retry after sushi credentials have changed using cele…
beda42 Jun 2, 2020
ae59421
black reformat one file
beda42 Jul 27, 2020
52c2881
implement history-mode for fetching and displaying sushifetchattemps
beda42 Jul 24, 2020
1e4bdb8
fix integrity error during import of titles with whitespace in ISBN -…
beda42 Jul 24, 2020
d6de448
add history mode filter to sushifetchattempt admin; implement custom …
beda42 Jul 26, 2020
e818972
Fix filename generation for sushi fetch attempts (internal_id=None)
shenek Jun 30, 2020
9d0813a
make organization.ext_id nullable
beda42 Jun 25, 2020
480acc3
integrate sentry into production settings
beda42 Jul 27, 2020
db41e54
modify the credentials_version_hash migration to create hashes for re…
beda42 Jul 27, 2020
f95c555
fix new sushifetchattempts not being accessible through the API as th…
beda42 Jul 29, 2020
1b8a167
add missing pytest fixture; fix the sushifetchattempt api test for th…
beda42 Jul 29, 2020
f7a94c4
properly disconnect user and organization when it happens in ERMS - r…
beda42 Sep 8, 2020
ae0d096
build test image only on master branch
shenek Jun 5, 2020
f8ddcd5
improve coverage report by excluding test files
shenek Jun 4, 2020
af7663a
Bump notebook from 6.0.3 to 6.1.5 in /requirements
dependabot[bot] Nov 18, 2020
5866bfe
Merge pull request #5 from techlib/dependabot/pip/requirements/notebo…
beda42 Nov 19, 2020
ea3b603
Bump lxml from 4.5.2 to 4.6.2 in /requirements
dependabot[bot] Jan 7, 2021
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .coveragerc
Original file line number Diff line number Diff line change
Expand Up @@ -9,3 +9,5 @@ omit =
*/management/*
*/tests.py
*/experiments.py
*/conftest.py
*/tests/*
4 changes: 4 additions & 0 deletions .gitlab/test-image.yml
Original file line number Diff line number Diff line change
Expand Up @@ -9,5 +9,9 @@ test_image:
- docker build --cache-from $CI_REGISTRY_IMAGE/celus-test:latest --tag $CI_REGISTRY_IMAGE/celus-test:latest --target celus-test -f ci/Dockerfile .
- docker push $CI_REGISTRY_IMAGE/celus-test:latest

rules:
- if: '$CI_COMMIT_BRANCH == "master"'
when: on_success

tags:
- dind
3 changes: 0 additions & 3 deletions apps/activity/tests.py

This file was deleted.

9 changes: 9 additions & 0 deletions apps/core/logic/sync.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,7 @@ def sync_data(self, records: [dict]) -> dict:
)
}
org_ext_id_to_db_obj = {org.ext_id: org for org in Organization.objects.all()}
seen_user_orgs = set()
for (org_ext_id, user_ext_id), is_admin in self._org_user_status.items():
uo = org_user_to_db_obj.get((org_ext_id, user_ext_id))
if not uo:
Expand All @@ -116,6 +117,7 @@ def sync_data(self, records: [dict]) -> dict:
user=user, organization=organization, is_admin=is_admin, source=self.data_source
)
org_user_to_db_obj[(org_ext_id, user_ext_id)] = uo
seen_user_orgs.add(uo.pk)
stats['User-Org created'] += 1
else:
if uo.is_admin != is_admin:
Expand All @@ -124,6 +126,13 @@ def sync_data(self, records: [dict]) -> dict:
stats['User-Org synced'] += 1
else:
stats['User-Org unchanged'] += 1
seen_user_orgs.add(uo.pk)
deleted, _details = (
UserOrganization.objects.filter(source=self.data_source)
.exclude(pk__in=seen_user_orgs)
.delete()
)
stats['User-Org deleted'] = deleted
return stats


Expand Down
28 changes: 28 additions & 0 deletions apps/core/tests/test_sync.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from core.logic.sync import sync_identities
from core.models import Identity, DataSource
from erms.sync import ERMSObjectSyncer
from organizations.models import Organization, UserOrganization
from ..logic.sync import sync_users


Expand Down Expand Up @@ -66,6 +67,33 @@ def test_sync_users_with_removal(self, data_source):
assert stats['removed'][0] == 1
assert User.objects.count() == 2

@pytest.mark.now
def test_sync_users_with_user_org_link_removal(self, data_source):
"""
Test that after a user gets removed from an organization, the link is properly removed
"""
User = get_user_model()
assert User.objects.count() == 0
Organization.objects.create(ext_id=10, name='Org 1', short_name='org_1')
Organization.objects.create(ext_id=100, name='Org 2', short_name='org_2')
input_data = [
{'id': 1, 'vals': {'name@cs': ['John Doe']}, 'refs': {'employee of': [10, 100]}},
]
stats = sync_users(data_source, input_data)
assert stats['removed'][0] == 0
assert stats[ERMSObjectSyncer.Status.NEW] == 1
assert User.objects.count() == 1
assert UserOrganization.objects.count() == 2
# now remove one of the organizations
input_data[0]['refs']['employee of'].remove(10)
stats = sync_users(data_source, input_data)
assert stats[ERMSObjectSyncer.Status.NEW] == 0
assert stats[ERMSObjectSyncer.Status.UNCHANGED] == 1
assert stats['removed'][0] == 0
assert stats['User-Org deleted'] == 1
assert User.objects.count() == 1
assert UserOrganization.objects.count() == 1


@pytest.mark.django_db
class TestIdentitySync(object):
Expand Down
62 changes: 31 additions & 31 deletions apps/logs/logic/data_import.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from typing import Optional

from core.logic.debug import log_memory
from logs.logic.validation import clean_and_validate_issn, ValidationError
from logs.logic.validation import clean_and_validate_issn, ValidationError, normalize_isbn
from logs.models import ImportBatch
from nigiri.counter5 import CounterRecord
from organizations.models import Organization
Expand All @@ -31,14 +31,7 @@ def get_or_create_with_map(model, mapping, attr_name, attr_value, other_attrs=No

class TitleManager(object):
def __init__(self):
# in the following, we use values_list to speed things up as there are a lot of objects
# and creating them takes a lot of time
# (e.g. processing time for import was cut from 3.5s to 1.2s by switching to this)
self.key_to_title_id_and_pub_type = {}
# tuple(t[:5]): tuple(t[5:])
# for t in Title.objects.all().order_by().
# values_list('name', 'isbn', 'issn', 'eissn', 'doi', 'pk', 'pub_type')
# }
self.stats = Counter()

def prefetch_titles(self, records: [TitleRec]):
Expand All @@ -54,6 +47,29 @@ def prefetch_titles(self, records: [TitleRec]):
}
logger.debug('Prefetched %d records', len(self.key_to_title_id_and_pub_type))

@classmethod
def normalize_title_rec(cls, record: TitleRec) -> TitleRec:
"""
Normalize specific fields in the record and return a new TitleRec with normalized data.
Should be run before one attempts to ingest the data into the database.
"""
# normalize issn, eissn and isbn - they are sometimes malformed by whitespace in the data
issn = record.issn
if issn:
issn = clean_and_validate_issn(issn, raise_error=False)
eissn = record.eissn
if eissn:
eissn = clean_and_validate_issn(eissn, raise_error=False)
isbn = normalize_isbn(record.isbn) if record.isbn else record.isbn
return TitleRec(
name=record.name,
isbn=isbn,
issn=issn,
eissn=eissn,
doi=record.doi,
pub_type=record.pub_type,
)

def get_or_create(self, record: TitleRec) -> Optional[int]:
if not record.name:
logger.warning(
Expand All @@ -64,23 +80,7 @@ def get_or_create(self, record: TitleRec) -> Optional[int]:
record.doi,
)
return None
# normalize issn, eissn and isbn - the are sometimes malformed by whitespace in the data
issn = record.issn
if issn:
try:
issn = clean_and_validate_issn(issn)
except ValidationError as e:
logger.error(f'Error: {e}')
issn = ''
eissn = record.eissn
if eissn:
try:
eissn = clean_and_validate_issn(eissn)
except ValidationError as e:
logger.error(f'Error: {e}')
eissn = ''
isbn = record.isbn.replace(' ', '') if record.isbn else record.isbn
key = (record.name, isbn, issn, eissn, record.doi)
key = (record.name, record.isbn, record.issn, record.eissn, record.doi)
if key in self.key_to_title_id_and_pub_type:
title_pk, db_pub_type = self.key_to_title_id_and_pub_type[key]
# check if we need to improve the pub_type from UNKNOWN to something better
Expand All @@ -94,9 +94,9 @@ def get_or_create(self, record: TitleRec) -> Optional[int]:
title = Title.objects.create(
name=record.name,
pub_type=record.pub_type,
isbn=isbn,
issn=issn,
eissn=eissn,
isbn=record.isbn,
issn=record.issn,
eissn=record.eissn,
doi=record.doi,
)
self.key_to_title_id_and_pub_type[key] = (title.pk, record.pub_type)
Expand All @@ -113,11 +113,11 @@ def counter_record_to_title_rec(self, record: CounterRecord) -> TitleRec:
if key == 'DOI':
doi = value
elif key == 'Online_ISSN':
eissn = value
eissn = clean_and_validate_issn(value, raise_error=False) if value else value
elif key == 'Print_ISSN':
issn = value
issn = clean_and_validate_issn(value, raise_error=False) if value else value
elif key == 'ISBN':
isbn = value
isbn = normalize_isbn(value) if value else value
pub_type = self.deduce_pub_type(eissn, isbn, issn, record)
# convert None values for the following attrs to empty strings
isbn = '' if isbn is None else isbn
Expand Down
13 changes: 11 additions & 2 deletions apps/logs/logic/validation.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
import logging
import re

logger = logging.getLogger(__name__)
issn_matcher = re.compile(r'^\d{4}-\d{3}[\dX]$')


Expand All @@ -8,8 +10,15 @@ class ValidationError(Exception):
pass


def clean_and_validate_issn(text: str) -> str:
def clean_and_validate_issn(text: str, raise_error=True) -> str:
clean = ''.join(text.split()) # remove all whitespace
if issn_matcher.match(clean):
return clean
raise ValidationError(f'Invalid ISSN: "{text}"')
if raise_error:
raise ValidationError(f'Invalid ISSN: "{text}"')
logger.warning('Invalid ISSN: "%s"', text)
return ''


def normalize_isbn(isbn: str) -> str:
return isbn.replace(' ', '')
4 changes: 2 additions & 2 deletions apps/nigiri/tests/test_fetching.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,13 @@
import pytest

from nigiri.counter5 import Counter5ReportBase
from organizations.tests.conftest import organizations
from logs.tests.conftest import report_type_nd

from nigiri.client import Sushi5Client
from publications.models import Platform
from sushi.logic.data_import import import_sushi_credentials
from sushi.models import SushiCredentials, CounterReportType
from organizations.tests.conftest import organizations
from logs.tests.conftest import report_type_nd


@pytest.mark.django_db
Expand Down
20 changes: 20 additions & 0 deletions apps/organizations/migrations/0015_nullable_ext_id.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
# Generated by Django 2.2.13 on 2020-06-25 07:56

from django.db import migrations, models


class Migration(migrations.Migration):

dependencies = [
('organizations', '0014_nullable_ico'),
]

operations = [
migrations.AlterField(
model_name='organization',
name='ext_id',
field=models.PositiveIntegerField(
default=None, help_text='object ID taken from EMRS', null=True, unique=True
),
),
]
5 changes: 3 additions & 2 deletions apps/organizations/models.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
from django.conf import settings
from django.contrib.postgres.fields import JSONField
from django.core.exceptions import ValidationError
from django.db import models
from mptt.fields import TreeForeignKey
from mptt.models import MPTTModel
Expand All @@ -10,7 +9,9 @@

class Organization(MPTTModel):

ext_id = models.PositiveIntegerField(unique=True, help_text='object ID taken from EMRS')
ext_id = models.PositiveIntegerField(
unique=True, help_text='object ID taken from EMRS', null=True, default=None
)
parent = TreeForeignKey(
'self', on_delete=models.CASCADE, null=True, blank=True, related_name='children'
)
Expand Down
2 changes: 1 addition & 1 deletion apps/organizations/tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ def identity_by_user_type(
):
def fn(user_type):
org = organizations[0]
# we do not user admin_client, master_client, etc. because the way the fixtures work
# we do not use admin_client, master_client, etc. because the way the fixtures work
# they all point to the same client object which obviously does not work
if user_type == 'no_user':
identity = None
Expand Down
2 changes: 1 addition & 1 deletion apps/organizations/tests/test_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
class TestOrganizationAPI(object):
def test_unauthorized_user(self, client, invalid_identity, authentication_headers):
resp = client.get(reverse('organization-list'), **authentication_headers(invalid_identity))
assert resp.status_code == 403
assert resp.status_code in (403, 401) # depends on auth backend

def test_authorized_user_no_orgs(self, authenticated_client):
resp = authenticated_client.get(reverse('organization-list'))
Expand Down
23 changes: 23 additions & 0 deletions apps/publications/tests/test_title_manager.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
import pytest

from logs.logic.data_import import TitleManager, TitleRec
from publications.models import Title


@pytest.mark.django_db
class TestTitleManager(object):
def test_mangled_isbn(self):
"""
Test for a bug that TitleManager looks for data in database with non-normalized isbn
but uses normalized ISBN when storing new data. This discrepancy may lead to
database level integrity error because of constraints.
:return:
"""
Title.objects.create(name='Foo', isbn='978-0-07-174521-5')
tm = TitleManager()
record = TitleRec(
name='Foo', isbn='978- 0-07-174521-5', issn='', eissn='', doi='', pub_type='U'
)
record = tm.normalize_title_rec(record)
tm.prefetch_titles(records=[record])
tm.get_or_create(record)
19 changes: 19 additions & 0 deletions apps/sushi/admin.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,24 @@ def queryset(self, request, queryset):
return queryset


class HistoryMode(admin.SimpleListFilter):
title = 'history mode'
parameter_name = 'hm'

def lookups(self, request, model_admin):
return (
('current', 'Current only'),
('current_and_success', 'Current and sucessful'),
)

def queryset(self, request, queryset):
if self.value() == 'current':
return queryset.current()
if self.value() == 'current_and_success':
return queryset.current_or_successful()
return queryset


@admin.register(models.SushiFetchAttempt)
class SushiFetchAttemptAdmin(admin.ModelAdmin):

Expand All @@ -100,6 +118,7 @@ class SushiFetchAttemptAdmin(admin.ModelAdmin):
'has_import_batch',
]
list_filter = [
HistoryMode,
'download_success',
'processing_success',
'is_processed',
Expand Down
Loading