From c1fb7e495253ea69ca2de81b2e59a2495696f5cd Mon Sep 17 00:00:00 2001 From: Pablo Tamarit Date: Tue, 17 Dec 2024 09:50:20 +0100 Subject: [PATCH] serializers: bibtex: year and month using publication date --- .../resources/serializers/bibtex/schema.py | 44 ++++++++++++++----- .../serializers/test_bibtex_serializer.py | 6 ++- 2 files changed, 37 insertions(+), 13 deletions(-) diff --git a/invenio_rdm_records/resources/serializers/bibtex/schema.py b/invenio_rdm_records/resources/serializers/bibtex/schema.py index e80f2aa28..e9c4ed258 100644 --- a/invenio_rdm_records/resources/serializers/bibtex/schema.py +++ b/invenio_rdm_records/resources/serializers/bibtex/schema.py @@ -7,11 +7,15 @@ """BibTex based Schema for Invenio RDM Records.""" -import datetime +import calendar import textwrap +from babel_edtf import parse_edtf +from edtf.parser.grammar import ParseException +from edtf.parser.parser_classes import Date, Interval from flask_resources.serializers import BaseSerializerSchema from marshmallow import fields, post_dump +from pydash import py_ from slugify import slugify from ..schemas import CommonFieldsMixin @@ -24,7 +28,7 @@ class BibTexSchema(BaseSerializerSchema, CommonFieldsMixin): id = fields.Str() resource_id = fields.Str(attribute="metadata.resource_type.id") version = fields.Str(attribute="metadata.version") - date_created = fields.Method("get_date_created") + date_published = fields.Method("get_date_published") locations = fields.Method("get_locations") titles = fields.Method("get_titles") doi = fields.Method("get_doi") @@ -71,13 +75,31 @@ def default_entry_type(self): """ return BibTexFormatter.misc - def get_date_created(self, obj): - """Get date last updated.""" - date_obj = datetime.datetime.fromisoformat(obj["created"]) + def get_date_published(self, obj): + """Get publication year and month from edtf date.""" + publication_date = py_.get(obj, "metadata.publication_date") + if not publication_date: + return None - month = date_obj.strftime("%b").lower() - year = date_obj.strftime("%Y") - return {"month": month, "year": year} + try: + parsed_date = parse_edtf(publication_date) + except ParseException: + return None + + if isinstance(parsed_date, Interval): + # if date is an interval, use the start date + parsed_date = parsed_date.lower + elif not isinstance(parsed_date, Date): + return None + + date_published = {"year": parsed_date.year} + if parsed_date.month: + month_three_letter_abbr = calendar.month_abbr[ + int(parsed_date.month) + ].lower() + date_published["month"] = month_three_letter_abbr + + return date_published def get_creator(self, obj): """Get creator.""" @@ -232,9 +254,9 @@ def _fetch_fields_map(self, data): "title": (lambda titles: None if titles is None else titles[0])( data.get("titles", None) ), - "year": data.get("date_created", {}).get("year", None), + "year": data.get("date_published", {}).get("year", None), "doi": data.get("doi", None), - "month": data.get("date_created", {}).get("month", None), + "month": data.get("date_published", {}).get("month", None), "version": data.get("version", None), "url": data.get("url", None), "school": data.get("school", None), @@ -287,7 +309,7 @@ def _get_citation_key(self, data, original_data): creator = creators[0].get("person_or_org", {}) name = creator.get("family_name", creator["name"]) - pubdate = data.get("date_created", {}).get("year", None) + pubdate = data.get("date_published", {}).get("year", None) year = id if pubdate is not None: year = "{}_{}".format(pubdate, id) diff --git a/tests/resources/serializers/test_bibtex_serializer.py b/tests/resources/serializers/test_bibtex_serializer.py index 5d2d54610..d42d5c2d0 100644 --- a/tests/resources/serializers/test_bibtex_serializer.py +++ b/tests/resources/serializers/test_bibtex_serializer.py @@ -16,7 +16,8 @@ def updated_minimal_record(minimal_record): """Update fields (done after record create) for BibTex serializer.""" minimal_record["access"]["status"] = "open" - minimal_record["created"] = "2023-03-09T00:00:00.000000+00:00" + minimal_record["metadata"]["publication_date"] = "2023-03-13" + minimal_record["created"] = "2024-12-17T00:00:00.000000+00:00" minimal_record["id"] = "abcde-fghij" for creator in minimal_record["metadata"]["creators"]: @@ -31,7 +32,8 @@ def updated_minimal_record(minimal_record): def updated_full_record(full_record_to_dict): """Update fields (done after record create) for BibTex serializer.""" full_record_to_dict["access"]["status"] = "embargoed" - full_record_to_dict["created"] = "2023-03-23T00:00:00.000000+00:00" + full_record_to_dict["metadata"]["publication_date"] = "2023-03/2024-02" + full_record_to_dict["created"] = "2024-12-17T00:00:00.000000+00:00" full_record_to_dict["id"] = "abcde-fghij" full_record_to_dict["metadata"]["resource_type"]["id"] = "other"