From 2926ca561d217602b978304559fd079dc12f593b Mon Sep 17 00:00:00 2001 From: Greg Kempe Date: Tue, 14 May 2024 10:34:07 +0400 Subject: [PATCH 1/4] pre-compute number of citations for a work --- .../migrations/0129_work_citation_counts.py | 46 +++++++++++++++++++ .../0130_backfill_citation_counts.py | 29 ++++++++++++ peachjam/models/citations.py | 10 +++- peachjam/models/core_document_model.py | 8 +++- peachjam/signals.py | 16 ++++++- 5 files changed, 104 insertions(+), 5 deletions(-) create mode 100644 peachjam/migrations/0129_work_citation_counts.py create mode 100644 peachjam/migrations/0130_backfill_citation_counts.py diff --git a/peachjam/migrations/0129_work_citation_counts.py b/peachjam/migrations/0129_work_citation_counts.py new file mode 100644 index 000000000..1e6c93507 --- /dev/null +++ b/peachjam/migrations/0129_work_citation_counts.py @@ -0,0 +1,46 @@ +# Generated by Django 3.2.25 on 2024-05-14 06:18 + +import django.db.models.deletion +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ("peachjam", "0128_courtclass_show_listing_page"), + ] + + operations = [ + migrations.AddField( + model_name="work", + name="n_cited_works", + field=models.IntegerField(default=0, verbose_name="number of cited works"), + ), + migrations.AddField( + model_name="work", + name="n_citing_works", + field=models.IntegerField( + default=0, verbose_name="number of incoming citations" + ), + ), + migrations.AlterField( + model_name="extractedcitation", + name="citing_work", + field=models.ForeignKey( + on_delete=django.db.models.deletion.CASCADE, + related_name="outgoing_citations", + to="peachjam.work", + verbose_name="citing work", + ), + ), + migrations.AlterField( + model_name="extractedcitation", + name="target_work", + field=models.ForeignKey( + on_delete=django.db.models.deletion.CASCADE, + related_name="incoming_citations", + to="peachjam.work", + verbose_name="target work", + ), + ), + ] diff --git a/peachjam/migrations/0130_backfill_citation_counts.py b/peachjam/migrations/0130_backfill_citation_counts.py new file mode 100644 index 000000000..720800f01 --- /dev/null +++ b/peachjam/migrations/0130_backfill_citation_counts.py @@ -0,0 +1,29 @@ +# Generated by Django 3.2.25 on 2024-05-14 06:19 + +from django.db import migrations +from django.db.models import Q + + +def backfill_citation_counts(apps, schema_editor): + Work = apps.get_model("peachjam", "Work") + ExtractedCitation = apps.get_model("peachjam", "ExtractedCitation") + + qs = Work.objects.filter( + Q(outgoing_citations__isnull=False) | Q(incoming_citations__isnull=False) + ) + for work in qs.order_by("-pk").distinct("pk").iterator(256): + work.n_cited_works = ExtractedCitation.objects.filter(citing_work=work).count() + work.n_citing_works = ExtractedCitation.objects.filter(target_work=work).count() + if work.n_cited_works or work.n_citing_works: + work.save(update_fields=["n_cited_works", "n_citing_works"]) + + +class Migration(migrations.Migration): + + dependencies = [ + ("peachjam", "0129_work_citation_counts"), + ] + + operations = [ + migrations.RunPython(backfill_citation_counts, migrations.RunPython.noop) + ] diff --git a/peachjam/models/citations.py b/peachjam/models/citations.py index 03aa84766..8a214a705 100644 --- a/peachjam/models/citations.py +++ b/peachjam/models/citations.py @@ -73,14 +73,14 @@ class ExtractedCitation(models.Model): "peachjam.Work", null=False, on_delete=models.CASCADE, - related_name="citing_work", + related_name="outgoing_citations", verbose_name=_("citing work"), ) target_work = models.ForeignKey( "peachjam.Work", null=False, on_delete=models.CASCADE, - related_name="target_work", + related_name="incoming_citations", verbose_name=_("target work"), ) @@ -100,6 +100,12 @@ def for_target_works(cls, work): .order_by("citing_work__title") ) + @classmethod + def update_counts_for_work(cls, work): + work.n_cited_works = cls.for_citing_works(work).count() + work.n_citing_works = cls.for_target_works(work).count() + work.save(update_fields=["n_cited_works", "n_citing_works"]) + class CitationProcessing(SingletonModel): processing_date = models.DateField(_("processing date"), null=True, blank=True) diff --git a/peachjam/models/core_document_model.py b/peachjam/models/core_document_model.py index 07e005875..48b67b134 100644 --- a/peachjam/models/core_document_model.py +++ b/peachjam/models/core_document_model.py @@ -169,6 +169,10 @@ class Work(models.Model): ) # the rank (weight) of this work in the graph network, computer by peachjam.graph.ranker ranking = models.FloatField(_("ranking"), null=True, blank=False, default=0.0) + # number of outgoing citations + n_cited_works = models.IntegerField(_("number of cited works"), default=0) + # number of incoming citations + n_citing_works = models.IntegerField(_("number of incoming citations"), default=0) class Meta: verbose_name = _("work") @@ -217,11 +221,11 @@ def fetch_cited_works_frbr_uris(self): return work_frbr_uris def cited_works(self): - """Return Shows a list of works cited by the current work.""" + """Returns a list of works cited by the current work.""" return ExtractedCitation.for_citing_works(self).values("target_work") def works_citing_current_work(self): - """Shows a list of works that cite the current work.""" + """Returns a list of works that cite the current work.""" return ExtractedCitation.for_target_works(self).values("citing_work") def save(self, *args, **kwargs): diff --git a/peachjam/signals.py b/peachjam/signals.py index 86f3612a5..c1cad0d95 100644 --- a/peachjam/signals.py +++ b/peachjam/signals.py @@ -1,7 +1,7 @@ from django.db.models import signals from django.dispatch import receiver -from peachjam.models import CoreDocument, SourceFile, Work +from peachjam.models import CoreDocument, ExtractedCitation, SourceFile, Work from peachjam.tasks import update_extracted_citations_for_a_work @@ -41,3 +41,17 @@ def convert_to_pdf(sender, instance, created, **kwargs): """Convert a source file to PDF when it's saved""" if created: instance.ensure_file_as_pdf() + + +@receiver(signals.post_save, sender=ExtractedCitation) +def extracted_citation_saved(sender, instance, **kwargs): + """Update citation counts on works.""" + ExtractedCitation.update_counts_for_work(instance.citing_work) + ExtractedCitation.update_counts_for_work(instance.target_work) + + +@receiver(signals.post_delete, sender=ExtractedCitation) +def extracted_citation_deleted(sender, instance, **kwargs): + """Update citation counts on works.""" + ExtractedCitation.update_counts_for_work(instance.citing_work) + ExtractedCitation.update_counts_for_work(instance.target_work) From f81af1bbb9524fc61f49e1628cbb0a2caa380067 Mon Sep 17 00:00:00 2001 From: Greg Kempe Date: Tue, 14 May 2024 11:02:04 +0400 Subject: [PATCH 2/4] improve calculation and display of citations --- peachjam/templates/peachjam/_citations.html | 8 +++--- .../templates/peachjam/_citations_list.html | 27 ++++++++++--------- .../peachjam/_citations_list_items.html | 18 +++++++++++++ .../peachjam/layouts/document_detail.html | 2 +- peachjam/views/generic_views.py | 24 +++++++---------- 5 files changed, 46 insertions(+), 33 deletions(-) create mode 100644 peachjam/templates/peachjam/_citations_list_items.html diff --git a/peachjam/templates/peachjam/_citations.html b/peachjam/templates/peachjam/_citations.html index aceb7fe67..10108eaf5 100644 --- a/peachjam/templates/peachjam/_citations.html +++ b/peachjam/templates/peachjam/_citations.html @@ -1,16 +1,16 @@ {% load peachjam i18n %}
-
+

- {% translate 'Cited documents' %} {{ cited_documents_count }} + {% translate 'Cited documents' %} {{ document.work.n_cited_works }}

{% include 'peachjam/_citations_list.html' with citations=cited_documents group="outgoing" %}
-
+

- {% translate 'Documents citing this one' %} {{ documents_citing_current_doc_count }} + {% translate 'Documents citing this one' %} {{ document.work.n_citing_works }}

{% include 'peachjam/_citations_list.html' with citations=documents_citing_current_doc group="incoming" %}
diff --git a/peachjam/templates/peachjam/_citations_list.html b/peachjam/templates/peachjam/_citations_list.html index 003ad8a5f..ee91d2863 100644 --- a/peachjam/templates/peachjam/_citations_list.html +++ b/peachjam/templates/peachjam/_citations_list.html @@ -1,22 +1,23 @@ {% load peachjam i18n %} {% for item in citations %} -
+
{{ item.doc_type }} {{ item.docs|length }}
-
    - {% for doc in item.docs|slice:":10" %} -
  1. - {{ doc.title }} -
  2. - {% endfor %} + + + + + + + + {% include 'peachjam/_citations_list_items.html' with start=0 docs=item.docs|slice:":10" %} + {% if item.docs|length > 10 %} - {% for doc in item.docs|slice:"10:" %} -
  3. - {{ doc.title }} -
  4. - {% endfor %} + + {% include 'peachjam/_citations_list_items.html' with start=10 docs=item.docs|slice:"10:" %} + {% endif %} - +
    {% if item.docs|length > 10 %} {% endif %} diff --git a/peachjam/views/generic_views.py b/peachjam/views/generic_views.py index 0be56bee4..102b1404c 100644 --- a/peachjam/views/generic_views.py +++ b/peachjam/views/generic_views.py @@ -177,26 +177,17 @@ def get_context_data(self, **kwargs): context["notices"] = self.get_notices() context["taxonomies"] = doc.taxonomies.prefetch_related("topic") + context["labels"] = doc.labels.all() - context["cited_documents"] = self.fetch_docs(doc.work.cited_works()) - context["documents_citing_current_doc"] = self.fetch_docs( + # citations + context["cited_documents"] = self.fetch_citation_docs(doc.work.cited_works()) + context["documents_citing_current_doc"] = self.fetch_citation_docs( doc.work.works_citing_current_work() ) - context["cited_documents_count"] = sum( - [len(doc["docs"]) for doc in context["cited_documents"]] - ) - context["documents_citing_current_doc_count"] = sum( - [len(doc["docs"]) for doc in context["documents_citing_current_doc"]] - ) - context["number_of_extracted_citations"] = ( - context["cited_documents_count"] - + context["documents_citing_current_doc_count"] - ) - context["labels"] = doc.labels.all() return context - def fetch_docs(self, works): + def fetch_citation_docs(self, works): docs = sorted( list( CoreDocument.objects.prefetch_related("work") @@ -214,7 +205,10 @@ def fetch_docs(self, works): result = [ { "doc_type": doc_type, - "docs": sorted(list(group), key=lambda d: d.title), + # sort by citations descending, then title + "docs": sorted( + list(group), key=lambda d: [-d.work.n_citing_works, d.title] + ), } for doc_type, group in grouped_docs ] From 57321ea1d550a55361e5af52b43d67f7d2a31bd4 Mon Sep 17 00:00:00 2001 From: Greg Kempe Date: Tue, 14 May 2024 11:21:37 +0400 Subject: [PATCH 3/4] group related works --- .../peachjam/_related_documents.html | 55 ++++++++++--------- peachjam/views/generic_views.py | 44 +++++++++++---- 2 files changed, 61 insertions(+), 38 deletions(-) diff --git a/peachjam/templates/peachjam/_related_documents.html b/peachjam/templates/peachjam/_related_documents.html index 96c631566..52b878c77 100644 --- a/peachjam/templates/peachjam/_related_documents.html +++ b/peachjam/templates/peachjam/_related_documents.html @@ -4,34 +4,37 @@

    {% trans "Related documents" %}

    - {% if relationships_as_subject %} -
    -
      - {% for rel in relationships_as_subject %} - {% if rel.object_work %} -
    • - {% translate rel.predicate.verb as verb %} - {{ verb|capfirst }} - {{ rel.object_work.title }} -
    • - {% endif %} - {% endfor %} -
    + {% if relationships_as_object %} +
    + {% for verb, rels in relationships_as_object %} +

    {% translate verb as verb %}{{ verb|capfirst }}

    +
      + {% for rel in rels %} + {% if rel.subject_work %} +
    1. + {% translate rel.predicate.reverse_verb as verb %} + {{ rel.subject_work.title }} +
    2. + {% endif %} + {% endfor %} +
    + {% endfor %}
    {% endif %} - {% if relationships_as_object %} -
    -
      - {% for rel in relationships_as_object %} - {% if rel.subject_work %} -
    • - {% translate rel.predicate.reverse_verb as verb %} - {{ verb|capfirst }} - {{ rel.subject_work.title }} -
    • - {% endif %} - {% endfor %} -
    + {% if relationships_as_subject %} +
    + {% for verb, rels in relationships_as_subject %} +

    {% translate verb as verb %}{{ verb|capfirst }}

    +
      + {% for rel in rels %} + {% if rel.object_work %} +
    1. + {{ rel.object_work.title }} +
    2. + {% endif %} + {% endfor %} +
    + {% endfor %}
    {% endif %}
    diff --git a/peachjam/views/generic_views.py b/peachjam/views/generic_views.py index 102b1404c..f2f2461ac 100644 --- a/peachjam/views/generic_views.py +++ b/peachjam/views/generic_views.py @@ -216,22 +216,42 @@ def fetch_citation_docs(self, works): return result def add_relationships(self, context): - relationships_as_subject = list( - Relationship.for_subject_document(context["document"]) - .filter(object_work__documents__isnull=False) - .distinct("pk") + # sort and group by predicate + rels_as_subject = sorted( + list( + Relationship.for_subject_document(context["document"]) + .filter(object_work__documents__isnull=False) + .distinct("pk") + ), + key=lambda r: [r.predicate.verb, r.object_work.title], ) + rels_as_subject = [ + (verb, list(group)) + for verb, group in itertools.groupby( + rels_as_subject, lambda r: r.predicate.verb + ) + ] - relationships_as_object = list( - Relationship.for_object_document(context["document"]) - .filter(subject_work__documents__isnull=False) - .distinct("pk") + # sort and group by predicate + rels_as_object = sorted( + list( + Relationship.for_object_document(context["document"]) + .filter(subject_work__documents__isnull=False) + .distinct("pk") + ), + key=lambda r: [r.predicate.reverse_verb, r.subject_work.title], ) + rels_as_object = [ + (verb, list(group)) + for verb, group in itertools.groupby( + rels_as_object, lambda r: r.predicate.reverse_verb + ) + ] - context["relationships_as_subject"] = relationships_as_subject - context["relationships_as_object"] = relationships_as_object - context["n_relationships"] = len(relationships_as_subject) + len( - relationships_as_object + context["relationships_as_subject"] = rels_as_subject + context["relationships_as_object"] = rels_as_object + context["n_relationships"] = sum( + len(g) for v, g in itertools.chain(rels_as_object, rels_as_subject) ) context["relationship_limit"] = 4 From 712333c4214f24cbf6009053b77962cf8d77c1fb Mon Sep 17 00:00:00 2001 From: Greg Kempe Date: Tue, 14 May 2024 16:10:01 +0200 Subject: [PATCH 4/4] fix relationships on main tab --- .../peachjam/layouts/document_detail.html | 36 ++++++++++--------- 1 file changed, 20 insertions(+), 16 deletions(-) diff --git a/peachjam/templates/peachjam/layouts/document_detail.html b/peachjam/templates/peachjam/layouts/document_detail.html index 7c10d6422..06a888356 100644 --- a/peachjam/templates/peachjam/layouts/document_detail.html +++ b/peachjam/templates/peachjam/layouts/document_detail.html @@ -378,23 +378,27 @@
    {% blocktrans %}{{ n_relationships }} related documents{% endblocktrans %} {% else %} - {% for rel in relationships_as_subject %} - {% if rel.object_work %} -
  5. - {% translate rel.predicate.verb as verb %} - {{ verb|capfirst }} - {{ rel.object_work.title }} -
  6. - {% endif %} + {% for verb, rels in relationships_as_subject %} + {% for rel in rels %} + {% if rel.object_work %} +
  7. + {% translate rel.predicate.verb as verb %} + {{ verb|capfirst }} + {{ rel.object_work.title }} +
  8. + {% endif %} + {% endfor %} {% endfor %} - {% for rel in relationships_as_object %} - {% if rel.subject_work %} -
  9. - {% translate rel.predicate.reverse_verb as verb %} - {{ verb|capfirst }} - {{ rel.subject_work.title }} -
  10. - {% endif %} + {% for verb, rels in relationships_as_object %} + {% for rel in rels %} + {% if rel.subject_work %} +
  11. + {% translate rel.predicate.reverse_verb as verb %} + {{ verb|capfirst }} + {{ rel.subject_work.title }} +
  12. + {% endif %} + {% endfor %} {% endfor %} {% endif %}