Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feature/collection api #181

Merged
merged 44 commits into from
Oct 3, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
44 commits
Select commit Hold shift + click to select a range
268ef37
implement defined namespaces
lukavdplas Jun 25, 2024
f9239fd
start collect app
lukavdplas Jun 20, 2024
9b369aa
draft collection class
lukavdplas Jun 20, 2024
7d3c641
add name/summary fields
lukavdplas Jun 25, 2024
85389d8
add projects and records fields
lukavdplas Jun 25, 2024
72c4c97
add new collectionviewset to router
lukavdplas Jul 1, 2024
ec1b944
get project in collection viewset
lukavdplas Jul 1, 2024
83f0dc3
add name_to_slug function
lukavdplas Jul 1, 2024
010771a
create collection
lukavdplas Jul 1, 2024
5f85a1f
add collections field to project
lukavdplas Jul 2, 2024
f000d65
set project in rdf model
lukavdplas Jul 2, 2024
24643a6
save project uri in database
lukavdplas Jul 2, 2024
4490214
correct saved uri for collections
lukavdplas Jul 2, 2024
3a9e7c2
set lookup value regex
lukavdplas Jul 2, 2024
31e5ab8
implement retrieve endpoint
lukavdplas Jul 2, 2024
5596d1b
outfactor creation function
lukavdplas Jul 2, 2024
a0f0f34
add tests for detail api
lukavdplas Jul 2, 2024
45356fe
check project permissions
lukavdplas Jul 2, 2024
d5d7b3f
implement deleting collections
lukavdplas Jul 2, 2024
ddc6f15
implement update method on collection view
lukavdplas Jul 2, 2024
9cb41be
block creation if collection exists
lukavdplas Jul 3, 2024
12fdc71
add collection serializer class
lukavdplas Jul 3, 2024
b9e3241
create ProjectField serializer field
lukavdplas Jul 3, 2024
84c2a06
use conventional method names get_queryset, get_object
lukavdplas Jul 3, 2024
3f8ff46
implement permission class for detail views
lukavdplas Jul 3, 2024
2026e02
add test for project permission
lukavdplas Jul 3, 2024
4201156
add project validation
lukavdplas Jul 16, 2024
239984b
use ModelViewset
lukavdplas Jul 16, 2024
825b84f
update collection models to prev commit
lukavdplas Jul 16, 2024
f1d5a39
store each collection in its own graph
lukavdplas Jul 16, 2024
aad08e1
add docstrings
lukavdplas Jul 16, 2024
4877e57
lift router to project level
lukavdplas Jul 16, 2024
b2eca55
add records to collection serialiser
lukavdplas Jul 16, 2024
9ac1757
correct function name
lukavdplas Jul 16, 2024
0ece853
remove records from collection endpoint
lukavdplas Jul 17, 2024
40b08fd
Merge branch 'feature/rdf-modelling-utils' into feature/collection-api
lukavdplas Jul 18, 2024
32b217c
set project uri during migration
lukavdplas Jul 18, 2024
0cf37a3
Merge branch 'feature/rdf-modelling-utils' into feature/collection-api
lukavdplas Jul 18, 2024
8394e09
outfactor url function
lukavdplas Jul 19, 2024
f6cee0c
update name_to_slug
lukavdplas Jul 19, 2024
e4dcd62
use single source of truth for project uris
lukavdplas Jul 19, 2024
a98d4df
add docstrings
lukavdplas Jul 19, 2024
e3d5a01
expand test for creating duplicate collection
lukavdplas Jul 19, 2024
0a76240
fix project uri migration
lukavdplas Jul 30, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Empty file added backend/collect/__init__.py
Empty file.
43 changes: 43 additions & 0 deletions backend/collect/api.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
from rest_framework.viewsets import ModelViewSet
from rest_framework.exceptions import NotFound
from rdflib import URIRef, RDF, Graph
from django.conf import settings

from projects.api import user_projects
from collect.rdf_models import EDPOPCollection
from collect.utils import collection_exists, collection_graph
from triplestore.constants import EDPOPCOL
from collect.serializers import CollectionSerializer
from collect.permissions import CollectionPermission

class CollectionViewSet(ModelViewSet):
'''
Viewset for listing or retrieving collections
'''

lookup_value_regex = '.+'
serializer_class = CollectionSerializer
permission_classes = [CollectionPermission]

def get_queryset(self):
projects = user_projects(self.request.user)
return [
EDPOPCollection(collection_graph(uri), uri)
for project in projects
for uri in project.rdf_model().collections
]


def get_object(self):
uri = URIRef(self.kwargs['pk'])

if not collection_exists(uri):
raise NotFound(f'Collection does not exist')

store = settings.RDFLIB_STORE
context = next(store.contexts((uri, RDF.type, EDPOPCOL.Collection)))
graph = Graph(store, context)
collection = EDPOPCollection(graph, uri)
self.check_object_permissions(self.request, collection)
return collection

129 changes: 129 additions & 0 deletions backend/collect/api_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,129 @@
from django.test import Client
from rest_framework.status import is_success, is_client_error
from rdflib import URIRef, RDF, Literal
from django.conf import settings
from urllib.parse import quote
from typing import Dict

from triplestore.constants import EDPOPCOL, AS
from collect.utils import collection_uri
from projects.models import Project

def example_collection_data(project_name) -> Dict:
return {
'name': 'My collection',
'summary': 'These are my favourite records',
'project': project_name,
}

def post_collection(client, project_name):
data = example_collection_data(project_name)
return client.post('/api/collections/', data, content_type='application/json')

def test_create_collection(db, user, project, client: Client):
client.force_login(user)

response = post_collection(client, project.name)
assert is_success(response.status_code)
uri = URIRef(response.data['uri'])

store = settings.RDFLIB_STORE
assert any(store.triples((uri, RDF.type, EDPOPCOL.Collection)))


def test_create_fails_if_collection_exists(db, user, project, client: Client):
client.force_login(user)
success_response = post_collection(client, project.name)
assert is_success(success_response.status_code)
uri = URIRef(success_response.data['uri'])

# try to create a collection at the same location
fail_response = client.post('/api/collections/', {
'name': 'My collection',
'summary': 'I like these too',
'project': project.name
})
assert is_client_error(fail_response.status_code)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Once database synchronization is added, I would also want to test here that there is no duplicate entry in the database.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

My impression was that if you add triples that already exist in blazegraph (in the same graph), it will have no effect. That is, the same triple won't be stored twice. Since the request to make a collection is idempotent, and this test makes the same request twice, I don't think you could check if the second time was executed?

What does make sense to me is to create a different collection in the second request (e.g. with a different description), to check that the create request doesn't store the new data. (This scenario is also why the api should reject the request.)

Correct me if I'm wrong about blazegraph here, though!

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You are right about Blazegraph, but I meant the representation of the collection in the PostgreSQL database. Hence "once database synchronization is added".

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I see! Though it was my understanding that we plan to only save collections in blazegraph in the future.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It depends on how we administrate which people have (write) access to which collections.


store = settings.RDFLIB_STORE
is_stored = lambda triple: any(store.triples(triple))
assert is_stored((uri, AS.summary, Literal('These are my favourite records')))
assert not is_stored((uri, AS.summary, Literal('I like these too')))


def test_list_collections(db, user, project, client: Client):
client.force_login(user)

response = client.get('/api/collections/')
assert is_success(response.status_code)
assert len(response.data) == 0

response = post_collection(client, project.name)

response = client.get('/api/collections/')
assert is_success(response.status_code)
assert len(response.data) == 1
assert response.data[0]['uri'] == settings.RDF_NAMESPACE_ROOT + 'collections/my_collection'
assert response.data[0]['name'] == 'My collection'


def collection_detail_url(collection_uri: str) -> str:
return '/api/collections/{}/'.format(quote(collection_uri, safe=''))


def test_retrieve_collection(db, user, project, client: Client):
client.force_login(user)
create_response = post_collection(client, project.name)


correct_url = collection_detail_url(create_response.data['uri'])
nonexistent_uri = collection_uri('does not exist')

not_found_response = client.get(collection_detail_url(nonexistent_uri))
assert not_found_response.status_code == 404

success_response = client.get(correct_url)
assert is_success(success_response.status_code)
assert success_response.data['name'] == 'My collection'

client.logout()
no_permission_response = client.get(correct_url)
assert no_permission_response.status_code == 403

def test_delete_collection(db, user, project, client: Client):
client.force_login(user)
create_response = post_collection(client, project.name)

detail_url = collection_detail_url(create_response.data['uri'])
delete_response = client.delete(detail_url)
assert is_success(delete_response.status_code)

retrieve_response = client.get(detail_url)
assert retrieve_response.status_code == 404

def test_update_collection(db, user, project, client: Client):
client.force_login(user)

create_response = post_collection(client, project.name)
detail_url = collection_detail_url(create_response.data['uri'])

data = example_collection_data(project.name)
data.update({'summary': 'I don\'t like these anymore'})

update_response = client.put(detail_url, data, content_type='application/json')
assert is_success(update_response.status_code)
assert update_response.data['summary'] == 'I don\'t like these anymore'


def test_project_validation(db, user, client: Client):
client.force_login(user)

Project.objects.create(name='secret', display_name='Top secret records')

response = client.post('/api/collections/', {
'name': 'new collection',
'summary': None,
'project': 'secret',
}, content_type='application/json')

assert is_client_error(response.status_code)
6 changes: 6 additions & 0 deletions backend/collect/apps.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
from django.apps import AppConfig


class CollectConfig(AppConfig):
default_auto_field = 'django.db.models.BigAutoField'
name = 'collect'
20 changes: 20 additions & 0 deletions backend/collect/conftest.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
import pytest
from django.contrib.auth.models import User
from projects.models import Project


@pytest.fixture()
def user(db) -> User:
return User.objects.create(
username='tester',
password='secret'
)

@pytest.fixture()
def project(db, user):
project = Project.objects.create(
name='test_project',
display_name='Test project'
)
project.users.add(user)
return project
Empty file.
17 changes: 17 additions & 0 deletions backend/collect/permissions.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
from rest_framework import permissions

from projects.models import Project

class CollectionPermission(permissions.BasePermission):
'''
Checks whether the user has access to read or write a collection.
'''

def has_object_permission(self, request, view, obj):
project_uri = obj.project
project = Project.objects.get(uri=project_uri)

if request.method in permissions.SAFE_METHODS:
return project.permit_query_by(request.user)
else:
return project.permit_update_by(request.user)
39 changes: 39 additions & 0 deletions backend/collect/rdf_models.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
from rdflib import RDFS, IdentifiedNode, URIRef
from typing import Iterable

from triplestore.utils import Triples
from triplestore.constants import EDPOPCOL, AS
from triplestore.rdf_model import RDFModel
from triplestore.rdf_field import RDFField, RDFUniquePropertyField


class CollectionMembersField(RDFField):
def get(self, instance: RDFModel):
return [
s
for (s, p, o) in self._stored_triples(instance)
]


def _stored_triples(self,instance: RDFModel) -> Triples:
g = self.get_graph(instance)
return g.triples((None, RDFS.member, instance.uri))


def _triples_to_store(self, instance: RDFModel, value: Iterable[IdentifiedNode]) -> Triples:
return [
(uri, RDFS.member, instance.uri)
for uri in value
]


class EDPOPCollection(RDFModel):
'''
RDF model for EDPOP collections.
'''
rdf_class = EDPOPCOL.Collection

name = RDFUniquePropertyField(AS.name)
summary = RDFUniquePropertyField(AS.summary)
project = RDFUniquePropertyField(AS.context)
records = CollectionMembersField()
lukavdplas marked this conversation as resolved.
Show resolved Hide resolved
38 changes: 38 additions & 0 deletions backend/collect/rdf_models_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
import pytest
from rdflib import URIRef, RDF, RDFS
from django.conf import settings

from triplestore.constants import AS, EDPOPCOL
from projects.models import Project
from projects.rdf_models import RDFProject
from collect.rdf_models import EDPOPCollection

@pytest.fixture()
def project(db):
project = Project.objects.create(name='test', display_name='Test')
rdf_project = RDFProject(project.graph(), project.identifier())
return rdf_project

def test_collection_model(project):
uri = URIRef('test-collection', base='https://test.org/collections/')

collection = EDPOPCollection(project.graph, uri)
collection.name = 'Test collection'
collection.project = project.uri
collection.records = [
URIRef('https://example.org/example1'),
URIRef('https://example.org/example2')
]
collection.save()

store = settings.RDFLIB_STORE

assert any(store.triples((collection.uri, RDF.type, EDPOPCOL.Collection)))
assert any(store.triples((collection.uri, AS.context, project.uri)))
assert any(store.triples((None, RDFS.member, collection.uri)))

collection.delete()

assert not any(store.triples((collection.uri, RDF.type, EDPOPCOL.Collection)))
assert not any(store.triples((collection.uri, AS.context, project.uri)))
assert not any(store.triples((None, RDFS.member, collection.uri)))
74 changes: 74 additions & 0 deletions backend/collect/serializers.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
from rest_framework import serializers
from rdflib import URIRef

from collect.rdf_models import EDPOPCollection
from collect.utils import collection_uri, collection_exists, collection_graph
from projects.models import Project


class ProjectField(serializers.Field):
def __init__(self, **kwargs):
super().__init__( **kwargs)

def to_internal_value(self, data):
project = Project.objects.get(name=data)
return URIRef(project.uri)

def to_representation(self, value):
project = Project.objects.get(uri=str(value))
return project.name


def can_update_project(data):
'''
Validates that the specified project is one the user is allowed to write to.

Note: not to be confused with CollectionPermission. That permission checks whether the
user has access to a collection its current context; this validator checks the
user-submitted data. This prevents users from adding collections to projects they
cannot access.
'''

project_uri = data['project']
user = data['user']

project_obj = Project.objects.get(uri=str(project_uri))
if not project_obj.permit_update_by(user):
raise serializers.ValidationError(
'No permission to write to this project'
)


class CollectionSerializer(serializers.Serializer):
name = serializers.CharField(max_length=128)
summary = serializers.CharField(
max_length=1024, required=False, allow_null=True, default=None
)
project = ProjectField()
uri = serializers.URLField(read_only=True)
user = serializers.HiddenField(default=serializers.CurrentUserDefault())

class Meta:
validators = [can_update_project]

def create(self, validated_data):
project_uri = validated_data['project']
uri = collection_uri(validated_data['name'])
graph = collection_graph(uri)

if collection_exists(uri):
raise serializers.ValidationError(f'Collection {uri} already exists')

collection = EDPOPCollection(graph, uri)
collection.name = validated_data['name']
collection.summary = validated_data['summary']
collection.project = project_uri
collection.save()
return collection

def update(self, instance: EDPOPCollection, validated_data):
instance.name = validated_data['name']
instance.summary = validated_data['summary']
instance.project = validated_data['project']
instance.save()
return instance
Loading