From 4175b82776eb50fe66791bee1d24750125793214 Mon Sep 17 00:00:00 2001
From: Ewan Cahen <e.cahen@esciencecenter.nl>
Date: Fri, 13 Sep 2024 16:07:23 +0200
Subject: [PATCH] feat: allow harvesting citations of OpenAlex reference papers

---
 database/011-create-mention-table.sql         |   5 +-
 database/104-software-views.sql               |  10 +-
 docker-compose.yml                            |   6 +-
 .../docs/01-users/05-adding-software.md       |   8 +-
 .../docs/01-users/07-adding-projects.md       |  10 +-
 .../docs/03-rsd-instance/03-administration.md |   2 +-
 .../admin/mentions/MentionsOverview.tsx       |   2 +-
 .../components/mention/EditMentionModal.tsx   |  11 +-
 frontend/components/mention/config.ts         |  12 +-
 frontend/package.json                         |   2 +-
 frontend/package.json.license                 |   2 +-
 frontend/types/Mention.ts                     |   4 +-
 .../nl/esciencecenter/rsd/scraper/Utils.java  |  17 +-
 .../rsd/scraper/doi/CitationData.java         |  41 ++--
 .../rsd/scraper/doi/CrossrefMention.java      |  68 +++---
 .../doi/DataCiteReleaseRepository.java        |  25 ++-
 .../doi/DataciteMentionRepository.java        |  95 ++++----
 .../esciencecenter/rsd/scraper/doi/Doi.java   |  63 ++++++
 .../scraper/doi/ExternalMentionRecord.java    |  26 +++
 .../rsd/scraper/doi/MainCitations.java        |  61 ++---
 .../rsd/scraper/doi/MainMentions.java         | 126 +++++++----
 .../rsd/scraper/doi/MainReleases.java         |  46 ++--
 .../rsd/scraper/doi/Mention.java              |  15 --
 .../rsd/scraper/doi/MentionRecord.java        |  52 -----
 .../rsd/scraper/doi/MentionRepository.java    |  17 --
 .../rsd/scraper/doi/OpenAlexCitations.java    | 140 +++++++-----
 .../rsd/scraper/doi/OpenalexId.java           |  72 ++++++
 .../doi/PostgrestCitationRepository.java      |  45 ++--
 .../doi/PostgrestMentionRepository.java       | 211 +++++++++---------
 .../doi/PostgrestReleaseRepository.java       |  35 +--
 .../rsd/scraper/doi/ReleaseData.java          |   8 +-
 .../rsd/scraper/doi/RsdMentionIds.java        |  15 ++
 .../rsd/scraper/doi/RsdMentionRecord.java     |  16 ++
 .../doi/DataciteMentionRepositoryTest.java    |  14 +-
 .../rsd/scraper/doi/DoiTest.java              |  54 +++++
 .../rsd/scraper/doi/MainMentionsTest.java     |   6 +-
 .../rsd/scraper/doi/OpenalexIdTest.java       |  49 ++++
 37 files changed, 849 insertions(+), 542 deletions(-)
 create mode 100644 scrapers/src/main/java/nl/esciencecenter/rsd/scraper/doi/Doi.java
 create mode 100644 scrapers/src/main/java/nl/esciencecenter/rsd/scraper/doi/ExternalMentionRecord.java
 delete mode 100644 scrapers/src/main/java/nl/esciencecenter/rsd/scraper/doi/Mention.java
 delete mode 100644 scrapers/src/main/java/nl/esciencecenter/rsd/scraper/doi/MentionRecord.java
 delete mode 100644 scrapers/src/main/java/nl/esciencecenter/rsd/scraper/doi/MentionRepository.java
 create mode 100644 scrapers/src/main/java/nl/esciencecenter/rsd/scraper/doi/OpenalexId.java
 create mode 100644 scrapers/src/main/java/nl/esciencecenter/rsd/scraper/doi/RsdMentionIds.java
 create mode 100644 scrapers/src/main/java/nl/esciencecenter/rsd/scraper/doi/RsdMentionRecord.java
 create mode 100644 scrapers/src/test/java/nl/esciencecenter/rsd/scraper/doi/DoiTest.java
 create mode 100644 scrapers/src/test/java/nl/esciencecenter/rsd/scraper/doi/OpenalexIdTest.java

diff --git a/database/011-create-mention-table.sql b/database/011-create-mention-table.sql
index 817b96ea6..146effeef 100644
--- a/database/011-create-mention-table.sql
+++ b/database/011-create-mention-table.sql
@@ -31,6 +31,7 @@ CREATE TABLE mention (
 	id UUID DEFAULT gen_random_uuid() PRIMARY KEY,
 	doi CITEXT UNIQUE CHECK (doi ~ '^10(\.\w+)+/\S+$' AND LENGTH(doi) <= 255),
 	doi_registration_date TIMESTAMPTZ,
+	openalex_id CITEXT UNIQUE CHECK (openalex_id ~ '^https://openalex\.org/[WwAaSsIiCcPpFf]\d{3,13}$'),
 	url VARCHAR(500) CHECK (url ~ '^https?://'),
 	title VARCHAR(3000) NOT NULL,
 	authors VARCHAR(50000),
@@ -40,15 +41,13 @@ CREATE TABLE mention (
 	page VARCHAR(50),
 	image_url VARCHAR(500) CHECK (image_url ~ '^https?://'),
 	mention_type mention_type NOT NULL,
-	external_id VARCHAR(500),
 	source VARCHAR(50) NOT NULL,
 	version VARCHAR(100),
 	note VARCHAR(500),
 	scraped_at TIMESTAMPTZ,
 	citations_scraped_at TIMESTAMPTZ,
 	created_at TIMESTAMPTZ NOT NULL,
-	updated_at TIMESTAMPTZ NOT NULL,
-	UNIQUE(external_id, source)
+	updated_at TIMESTAMPTZ NOT NULL
 );
 
 CREATE FUNCTION sanitise_insert_mention() RETURNS TRIGGER LANGUAGE plpgsql AS
diff --git a/database/104-software-views.sql b/database/104-software-views.sql
index ffaee20b3..6380a0b33 100644
--- a/database/104-software-views.sql
+++ b/database/104-software-views.sql
@@ -56,18 +56,20 @@ CREATE FUNCTION reference_papers_to_scrape()
 RETURNS TABLE (
 	id UUID,
 	doi CITEXT,
+	openalex_id CITEXT,
 	citations_scraped_at TIMESTAMPTZ,
-	known_dois CITEXT[]
+	known_citing_dois CITEXT[]
 )
 LANGUAGE sql STABLE AS
 $$
-	SELECT mention.id, mention.doi, mention.citations_scraped_at, ARRAY_REMOVE(ARRAY_AGG(citation.doi), NULL)
+	SELECT mention.id, mention.doi, mention.openalex_id, mention.citations_scraped_at, ARRAY_REMOVE(ARRAY_AGG(citation.doi), NULL)
 	FROM mention
 	LEFT JOIN citation_for_mention ON mention.id = citation_for_mention.mention
 	LEFT JOIN mention AS citation ON citation_for_mention.citation = citation.id
 	WHERE
-	-- ONLY items with DOI
-		mention.doi IS NOT NULL AND (
+	-- ONLY items with DOI or OpenAlex id
+		(mention.doi IS NOT NULL OR mention.openalex_id IS NOT NULL)
+		AND (
 			mention.id IN (
 				SELECT mention FROM reference_paper_for_software
 			)
diff --git a/docker-compose.yml b/docker-compose.yml
index 5d6d5dd17..dfe8b64ac 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -16,7 +16,7 @@ version: "3.0"
 services:
   database:
     build: ./database
-    image: rsd/database:2.4.1
+    image: rsd/database:2.5.0
     ports:
       # enable connection from outside (development mode)
       - "5432:5432"
@@ -110,7 +110,7 @@ services:
       # dockerfile to use for build
       dockerfile: Dockerfile
     # update version number to correspond to frontend/package.json
-    image: rsd/frontend:2.19.0
+    image: rsd/frontend:2.19.1
     environment:
       # it uses values from .env file
       - POSTGREST_URL
@@ -158,7 +158,7 @@ services:
 
   scrapers:
     build: ./scrapers
-    image: rsd/scrapers:1.8.1
+    image: rsd/scrapers:1.9.0
     environment:
       # it uses values from .env file
       - POSTGREST_URL
diff --git a/documentation/docs/01-users/05-adding-software.md b/documentation/docs/01-users/05-adding-software.md
index 02ace22ad..35c289c4e 100644
--- a/documentation/docs/01-users/05-adding-software.md
+++ b/documentation/docs/01-users/05-adding-software.md
@@ -75,7 +75,7 @@ When using a Document URL to point to a remote Markdown file on the GitHub, you
 
 ### Logo
 
-The software logo is shown on the software page and in the software card (see example below). **You can upload an image up to 2MB of size**. Widely used image formats like jpg, jpeg, png, svg etc. are supported. Use the **svg** format, if possible, because it scales better than other formats.
+The software logo is shown on the software page and in the software card (see example below). **You can upload an image up to 2MB of size**. Widely used image formats like JPG, JPEG, PNG, SVG etc. are supported. Use the **svg** format, if possible, because it scales better than other formats.
 
 ![image](img/software-logo-card.webp)
 
@@ -164,14 +164,14 @@ This section allows you to add mentions to your software page. You can use this
 
 ### Reference papers
 
-Use the *Search* box on the right hand side to find papers by DOI or title. All the relevant data about the publication will be retrieved automatically. A background scraper will use [OpenAlex](https://openalex.org/) to collect all citations of the reference papers.
+Use the *Search* box on the right hand side to find papers by DOI or title. All the relevant data about the publication will be retrieved automatically. A background scraper will use [OpenAlex](https://openalex.org/) to collect all citations of reference papers that have a DOI or an OpenAlex ID.
 
 ### Citations
 
-All the results RSD scraper was able to find on [OpenAlex](https://openalex.org/) citing provided reference papers. It can take a few minutes before the citations are harvested.
+These are the citations of the reference papers that the RSD scraper was able to find on [OpenAlex](https://openalex.org/. It can take a few minutes before the citations are harvested.
 
 :::warning
-You cannot edit this section. All entries are automatically generated by the RSD scraper service. The found mentions are displayed in the mentions section of the software page.
+You cannot edit the content of this section. All entries are automatically harvested and generated by the RSD scraper. The mentions found are displayed in the mentions section of the software page.
 :::
 
 ### Related output
diff --git a/documentation/docs/01-users/07-adding-projects.md b/documentation/docs/01-users/07-adding-projects.md
index bf0eff51a..41b887e62 100644
--- a/documentation/docs/01-users/07-adding-projects.md
+++ b/documentation/docs/01-users/07-adding-projects.md
@@ -8,7 +8,7 @@ After signing in, use the **"+"** button next to your avatar icon on the top rig
 
 ![image](img/new-project.gif)
 
-The RSD will automatically generate a *slug* for your project based on the project name you have provided. This slug will become part of the URL on which your project page can found.
+The RSD will automatically generate a *slug* for your project based on the project name you have provided. This slug will become part of the URL on which your project page can be found.
 There is a small chance the generated slug is already in use by another project. If this is the case, an error will be shown, and you will need to change the slug manually to resolve this conflict.
 
 Once you click **"save"**, the RSD will initialize a new empty project page. This page will not be public yet to give you some time to provide additional information. Next, you can add additional information in the edit sections explained below.
@@ -106,16 +106,16 @@ You can import up to 50 publications by providing a list of DOIs, one per line.
 If the output has no DOI you can create new mention item manually. Each manually added item should at least have a **Title**, **Type** and **URL**. All other fields are optional. The **Note** field can be used to add a note to this item, and will not be shown on the project page.
 
 :::warning
-Please check if the information is complete and correct. A manual item can not be edited after it has been saved!
+Please check if the information is complete and correct. A manual item cannot be edited after it has been saved!
 You can, however, delete an item and create a new one.
 :::
 
 ### Citations
 
-Here we list all the citations of your output that the RSD was able to find automatically by using the DOIs of your output and OpenAlex. On the project page these citations are shown in the impact section together with the items you added manually.
+Here, we list all the citations of your output (that has a DOI or OpenAlex ID) that the RSD was able to find automatically on OpenAlex. On the project page, these citations are shown in the impact section together with the items you added manually.
 
 :::warning
-You cannot edit this section. All entries are automatically generated by the RSD scraper service. Found publications are displayed in the impact section of the project page.
+You cannot edit the content of this section. All entries are automatically harvested and generated by the RSD scraper. The publications found are displayed in the impact section of the project page.
 :::
 
 ### Impact
@@ -135,7 +135,7 @@ You can import up to 50 publications by providing a list of DOIs, one per line.
 If the publication has no DOI you can create a new item manually. Each manually added item should at least have a **Title**, **Type** and **URL**. All other fields are optional. The **Note** field can be used to add a note to this item, and will not be shown on the project page.
 
 :::warning
-Please check if the information is complete and correct. A manual item can not be edited after it has been saved! You can, however, delete an item and create a new one.
+Please check if the information is complete and correct. A manual item cannot be edited after it has been saved! You can, however, delete an item and create a new one.
 :::
 
 ## Related projects
diff --git a/documentation/docs/03-rsd-instance/03-administration.md b/documentation/docs/03-rsd-instance/03-administration.md
index fd9afbfb8..912f4ddb2 100644
--- a/documentation/docs/03-rsd-instance/03-administration.md
+++ b/documentation/docs/03-rsd-instance/03-administration.md
@@ -187,7 +187,7 @@ fill the `provenance_iri` column. Further read [Linked Data](https://en.wikipedi
 
 ## Mentions
 
-In this section, admins can search for mentions and edit them. If you enter a DOI or UUID, we search on that field only. Otherwise, we search on title, authors, journal, URL, note and external ID (like an OpenAlex ID).
+In this section, admins can search for mentions and edit them. If you enter a DOI or UUID, we search on that field only. Otherwise, we search on title, authors, journal, URL, note and OpenAlex ID.
 
 :::warning
 Edit mentions with care: they might be referenced to in multiple places. If you want to fully change a mention attached to e.g. a software page, you should delete it there and create a new one instead of editing it.
diff --git a/frontend/components/admin/mentions/MentionsOverview.tsx b/frontend/components/admin/mentions/MentionsOverview.tsx
index d34e92bf3..43055110e 100644
--- a/frontend/components/admin/mentions/MentionsOverview.tsx
+++ b/frontend/components/admin/mentions/MentionsOverview.tsx
@@ -52,7 +52,7 @@ export default function MentionsOverview() {
     if (searchTypeTerm.type === 'doi') {
       return `doi=eq.${termEscaped}`
     }
-    return `or=(title.ilike.*${termEscaped}*,authors.ilike.*${termEscaped}*,journal.ilike.*${termEscaped}*,url.ilike.*${termEscaped}*,note.ilike.*${termEscaped}*,external_id.ilike.*${termEscaped}*)`
+    return `or=(title.ilike.*${termEscaped}*,authors.ilike.*${termEscaped}*,journal.ilike.*${termEscaped}*,url.ilike.*${termEscaped}*,note.ilike.*${termEscaped}*,openalex_id.ilike.*${termEscaped}*)`
   }
 
   function sanitiseSearch(search: string): string | undefined {
diff --git a/frontend/components/mention/EditMentionModal.tsx b/frontend/components/mention/EditMentionModal.tsx
index 9370a5a82..c4d61d493 100644
--- a/frontend/components/mention/EditMentionModal.tsx
+++ b/frontend/components/mention/EditMentionModal.tsx
@@ -291,14 +291,13 @@ export default function EditMentionModal({open, onCancel, onSubmit, item, pos, t
           <ControlledTextField
             control={control}
             options={{
-              name: 'external_id',
-              label: config.external_id.label,
+              name: 'openalex_id',
+              label: config.openalex_id.label,
               useNull: true,
-              defaultValue: formData?.external_id,
-              helperTextMessage: config.external_id.help,
-              helperTextCnt: `${formData?.external_id?.length || 0}/${config.external_id.validation.maxLength.value}`,
+              defaultValue: formData?.openalex_id,
+              helperTextMessage: config.openalex_id.help,
             }}
-            rules={config.external_id.validation}
+            rules={config.openalex_id.validation}
           />
           <div className="py-2"></div>
         </>
diff --git a/frontend/components/mention/config.ts b/frontend/components/mention/config.ts
index 65fec1038..514c9cf45 100644
--- a/frontend/components/mention/config.ts
+++ b/frontend/components/mention/config.ts
@@ -135,14 +135,14 @@ export const mentionModal = {
       }
     }
   },
-  external_id: {
-    label: 'External ID',
-    help: 'An ID used by e.g. OpenAlex',
+  openalex_id: {
+    label: 'OpenAlex ID',
+    help: 'The OpenAlex ID',
     validation: {
       required: false,
-      maxLength: {
-        value: 500,
-        message: 'Maximum length is 500'
+      pattern: {
+        value: /^https:\/\/openalex\.org\/[WwAaSsIiCcPpFf]\d{3,13}$/,
+        message: 'e.g. https://openalex.org/W3160330321'
       }
     }
   },
diff --git a/frontend/package.json b/frontend/package.json
index f68a136e1..5e675d60b 100644
--- a/frontend/package.json
+++ b/frontend/package.json
@@ -1,6 +1,6 @@
 {
   "name": "rsd-frontend",
-  "version": "2.19.0",
+  "version": "2.19.1",
   "private": true,
   "scripts": {
     "dev": "next dev",
diff --git a/frontend/package.json.license b/frontend/package.json.license
index 7bc0d9869..eb63f95f2 100644
--- a/frontend/package.json.license
+++ b/frontend/package.json.license
@@ -3,7 +3,7 @@ SPDX-FileCopyrightText: 2021 - 2023 dv4all
 SPDX-FileCopyrightText: 2022 - 2024 Netherlands eScience Center
 SPDX-FileCopyrightText: 2022 Jesús García Gonzalez (Netherlands eScience Center) <j.g.gonzalez@esciencecenter.nl>
 SPDX-FileCopyrightText: 2023 - 2024 Dusan Mijatovic (Netherlands eScience Center)
-SPDX-FileCopyrightText: 2023 Ewan Cahen (Netherlands eScience Center) <e.cahen@esciencecenter.nl>
+SPDX-FileCopyrightText: 2023 - 2024 Ewan Cahen (Netherlands eScience Center) <e.cahen@esciencecenter.nl>
 
 SPDX-License-Identifier: Apache-2.0
 SPDX-License-Identifier: CC-BY-4.0
diff --git a/frontend/types/Mention.ts b/frontend/types/Mention.ts
index 0b1751c79..63779e630 100644
--- a/frontend/types/Mention.ts
+++ b/frontend/types/Mention.ts
@@ -29,10 +29,10 @@ export type MentionItemProps = {
   mention_type: MentionTypeKeys | null
   source: string
   note?: string | null
-  external_id?: string | null
+  openalex_id?: string | null
 }
 
-export const mentionColumns ='id,doi,url,title,authors,publisher,publication_year,journal,page,image_url,mention_type,source,note'
+export const mentionColumns ='id,doi,openalex_id,url,title,authors,publisher,publication_year,journal,page,image_url,mention_type,source,note'
 
 export type MentionByType = {
   [key in MentionTypeKeys]?: MentionItemProps[]
diff --git a/scrapers/src/main/java/nl/esciencecenter/rsd/scraper/Utils.java b/scrapers/src/main/java/nl/esciencecenter/rsd/scraper/Utils.java
index 60c9342ae..27f249ea2 100644
--- a/scrapers/src/main/java/nl/esciencecenter/rsd/scraper/Utils.java
+++ b/scrapers/src/main/java/nl/esciencecenter/rsd/scraper/Utils.java
@@ -127,7 +127,7 @@ public static String getAsAdmin(String uri) {
 			Thread.currentThread().interrupt();
 			throw new RuntimeException(e);
 		} catch (IOException e) {
-			LOGGER.warn("An error occurred sending a request to {}", uri, e);
+			LOGGER.error("An error occurred sending a request to {}", uri, e);
 			throw new RuntimeException(e);
 		}
 
@@ -188,7 +188,9 @@ public static String postAsAdmin(String uri, String json, String... extraHeaders
 				.timeout(DEFAULT_TIMEOUT)
 				.header("Content-Type", "application/json")
 				.header("Authorization", "Bearer " + jwtString);
-		if (extraHeaders != null && extraHeaders.length > 0) builder.headers(extraHeaders);
+		if (extraHeaders != null && extraHeaders.length > 0) {
+			builder.headers(extraHeaders);
+		}
 		HttpRequest request = builder.build();
 		HttpResponse<String> response;
 
@@ -276,15 +278,18 @@ static String createPatchUri(String baseuri, String tableName, String primaryKey
 		return "%s/%s?%s=eq.%s".formatted(baseuri, tableName, primaryKeyName, primaryKey);
 	}
 
-	public static String patchAsAdmin(String uri, String json) {
+	public static String patchAsAdmin(String uri, String json, String... extraHeaders) {
 		String jwtString = adminJwt();
-		HttpRequest request = HttpRequest.newBuilder()
+		HttpRequest.Builder builder = HttpRequest.newBuilder()
 				.method("PATCH", HttpRequest.BodyPublishers.ofString(json))
 				.uri(URI.create(uri))
 				.timeout(Duration.ofSeconds(30))
 				.header("Content-Type", "application/json")
-				.header("Authorization", "Bearer " + jwtString)
-				.build();
+				.header("Authorization", "Bearer " + jwtString);
+		if (extraHeaders != null && extraHeaders.length > 0) {
+			builder.headers(extraHeaders);
+		}
+		HttpRequest request = builder.build();
 		HttpResponse<String> response;
 		try (HttpClient client = HttpClient.newHttpClient()) {
 			response = client.send(request, HttpResponse.BodyHandlers.ofString());
diff --git a/scrapers/src/main/java/nl/esciencecenter/rsd/scraper/doi/CitationData.java b/scrapers/src/main/java/nl/esciencecenter/rsd/scraper/doi/CitationData.java
index 94bd1ae79..3af97d419 100644
--- a/scrapers/src/main/java/nl/esciencecenter/rsd/scraper/doi/CitationData.java
+++ b/scrapers/src/main/java/nl/esciencecenter/rsd/scraper/doi/CitationData.java
@@ -1,5 +1,5 @@
-// SPDX-FileCopyrightText: 2023 Ewan Cahen (Netherlands eScience Center) <e.cahen@esciencecenter.nl>
-// SPDX-FileCopyrightText: 2023 Netherlands eScience Center
+// SPDX-FileCopyrightText: 2023 - 2024 Ewan Cahen (Netherlands eScience Center) <e.cahen@esciencecenter.nl>
+// SPDX-FileCopyrightText: 2023 - 2024 Netherlands eScience Center
 //
 // SPDX-License-Identifier: Apache-2.0
 
@@ -8,31 +8,20 @@
 import java.util.Collection;
 import java.util.UUID;
 
-/** 
+/**
  * Container class for Citation information retrieved from the database.
  */
-public class CitationData {
+public record CitationData(
+		// UUID of this entry in the database
+		UUID id,
 
-	// UUID of this entry in the database
-	public final UUID id;
-	
-	// DOI of this entry.
-	public final String doi;
-	
-	// List of known DOIs citing this entry.
-	public final Collection<String> knownDois;
-	
-	/**
-	 * Create a CitationData and initialize with data provided.
-	 * 
-	 * @param id of this entry in the database
-	 * @param doi of this entry
-	 * @param knownDois list of known DOIs citing this entry
-	 */
-	public CitationData(UUID id, String doi, Collection<String> knownDois) {
-		super();
-		this.id = id;
-		this.doi = doi;
-		this.knownDois = knownDois;
-	}
+		// DOI of this entry.
+		Doi doi,
+
+		// OpenAlex ID of this entry
+		OpenalexId openalexId,
+
+		// List of known DOIs citing this entry.
+		Collection<Doi> knownDois
+) {
 }
diff --git a/scrapers/src/main/java/nl/esciencecenter/rsd/scraper/doi/CrossrefMention.java b/scrapers/src/main/java/nl/esciencecenter/rsd/scraper/doi/CrossrefMention.java
index 5dc6fe905..67e0a51bb 100644
--- a/scrapers/src/main/java/nl/esciencecenter/rsd/scraper/doi/CrossrefMention.java
+++ b/scrapers/src/main/java/nl/esciencecenter/rsd/scraper/doi/CrossrefMention.java
@@ -16,14 +16,13 @@
 
 import java.io.IOException;
 import java.net.URI;
-import java.time.Instant;
 import java.util.ArrayList;
 import java.util.Collection;
 import java.util.HashMap;
 import java.util.Map;
 import java.util.Objects;
 
-public class CrossrefMention implements Mention {
+public class CrossrefMention {
 
 	static final Map<String, MentionType> crossrefTypeMap;
 
@@ -62,26 +61,24 @@ public class CrossrefMention implements Mention {
 		crossrefTypeMap.put("standard-series", MentionType.other);
 	}
 
-	private final String doi;
+	private final Doi doi;
 
-	public CrossrefMention(String doi) {
+	public CrossrefMention(Doi doi) {
 		this.doi = Objects.requireNonNull(doi);
 	}
 
-	@Override
-	public MentionRecord mentionData() throws IOException, InterruptedException, RsdResponseException {
-		StringBuilder url = new StringBuilder("https://api.crossref.org/works/" + Utils.urlEncode(doi));
-		Config.crossrefContactEmail().ifPresent(email -> url.append("?mailto=").append(email));
-		String responseJson = Utils.get(url.toString());
+	public ExternalMentionRecord mentionData() throws IOException, InterruptedException, RsdResponseException {
+		StringBuilder crossrefUrlBuilder = new StringBuilder("https://api.crossref.org/works/" + Utils.urlEncode(doi.toString()));
+		Config.crossrefContactEmail().ifPresent(email -> crossrefUrlBuilder.append("?mailto=").append(email));
+		String responseJson = Utils.get(crossrefUrlBuilder.toString());
 		JsonObject jsonTree = JsonParser.parseString(responseJson).getAsJsonObject();
-		MentionRecord result = new MentionRecord();
 		JsonObject workJson = jsonTree.getAsJsonObject("message");
 
-		result.doi = doi;
-		result.url = URI.create("https://doi.org/" + Utils.urlEncode(result.doi));
-		result.title = workJson.getAsJsonArray("title").get(0).getAsString();
+		URI mentionUrl = URI.create("https://doi.org/" + Utils.urlEncode(this.doi.toString()));
+		String title = workJson.getAsJsonArray("title").get(0).getAsString();
 
-		Collection<String> authors = new ArrayList<>();
+		Collection<String> authorsBuilder = new ArrayList<>();
+		String authors = null;
 		Iterable<JsonObject> authorsJson = (Iterable) workJson.getAsJsonArray("author");
 		if (authorsJson != null) {
 			for (JsonObject authorJson : authorsJson) {
@@ -89,36 +86,51 @@ public MentionRecord mentionData() throws IOException, InterruptedException, Rsd
 				String familyName = Utils.stringOrNull(authorJson.get("family"));
 				String name = Utils.stringOrNull(authorJson.get("name"));
 				if (givenName != null && familyName != null) {
-					authors.add(givenName + " " + familyName);
+					authorsBuilder.add(givenName + " " + familyName);
 				} else if (name != null) {
-					authors.add(name);
+					authorsBuilder.add(name);
 				} else if (givenName != null) {
-					authors.add(givenName);
+					authorsBuilder.add(givenName);
 				} else if (familyName != null) {
-					authors.add(familyName);
+					authorsBuilder.add(familyName);
 				}
 			}
-			result.authors = String.join(", ", authors);
+			authors = String.join(", ", authorsBuilder);
 		}
 
-		result.publisher = Utils.stringOrNull(workJson.get("publisher"));
+		String publisher = Utils.stringOrNull(workJson.get("publisher"));
+		Integer publicationYear = null;
 		try {
-			result.publicationYear = Utils.integerOrNull(workJson.getAsJsonObject("published").getAsJsonArray("date-parts").get(0).getAsJsonArray().get(0));
+			publicationYear = Utils.integerOrNull(workJson.getAsJsonObject("published").getAsJsonArray("date-parts").get(0).getAsJsonArray().get(0));
 		} catch (RuntimeException e) {
 			//			year not found, we leave it at null, nothing to do
 		}
+		String journal = null;
 		if (!workJson.getAsJsonArray("container-title").isEmpty()) {
 			JsonArray journalTitles = workJson.getAsJsonArray("container-title");
-			result.journal = journalTitles.get(0).getAsString();
+			StringBuilder journalBuilder = new StringBuilder(journalTitles.get(0).getAsString());
 			for (int i = 1; i < journalTitles.size(); i++) {
-				result.journal += ", " + journalTitles.get(i).getAsString();
+				journalBuilder.append(", ").append(journalTitles.get(i).getAsString());
 			}
+			journal = journalBuilder.toString();
 		}
-		result.page = Utils.stringOrNull(workJson.get("page"));
-		result.mentionType = crossrefTypeMap.getOrDefault(Utils.stringOrNull(workJson.get("type")), MentionType.other);
-		result.source = "Crossref";
-		result.scrapedAt = Instant.now();
+		String page = Utils.stringOrNull(workJson.get("page"));
+		MentionType mentionType = crossrefTypeMap.getOrDefault(Utils.stringOrNull(workJson.get("type")), MentionType.other);
 
-		return result;
+		return new ExternalMentionRecord(
+				this.doi,
+				null,
+				null,
+				mentionUrl,
+				title,
+				authors,
+				publisher,
+				publicationYear,
+				journal,
+				page,
+				mentionType,
+				"Crossref",
+				null
+		);
 	}
 }
diff --git a/scrapers/src/main/java/nl/esciencecenter/rsd/scraper/doi/DataCiteReleaseRepository.java b/scrapers/src/main/java/nl/esciencecenter/rsd/scraper/doi/DataCiteReleaseRepository.java
index a5f64ba65..b50a46791 100644
--- a/scrapers/src/main/java/nl/esciencecenter/rsd/scraper/doi/DataCiteReleaseRepository.java
+++ b/scrapers/src/main/java/nl/esciencecenter/rsd/scraper/doi/DataCiteReleaseRepository.java
@@ -16,13 +16,13 @@
 import java.util.ArrayList;
 import java.util.Collection;
 import java.util.Collections;
+import java.util.HashMap;
 import java.util.Map;
-import java.util.TreeMap;
 
 public class DataCiteReleaseRepository {
 
 	private static final Logger LOGGER = LoggerFactory.getLogger(DataCiteReleaseRepository.class);
-	
+
 	private static final String QUERY_UNFORMATTED = """
 			query {
 			  works(ids: [%s], first: 10000) {
@@ -39,35 +39,36 @@ public class DataCiteReleaseRepository {
 			}
 			""";
 
-	public Map<String, Collection<MentionRecord>> getVersionedDois(Collection<String> conceptDois) {
+	public Map<Doi, Collection<ExternalMentionRecord>> getVersionedDois(Collection<Doi> conceptDois) {
 		if (conceptDois.isEmpty()) {
 			return Collections.emptyMap();
 		}
 
-		String query = QUERY_UNFORMATTED.formatted(DataciteMentionRepository.joinCollection(conceptDois));
+		String query = QUERY_UNFORMATTED.formatted(DataciteMentionRepository.joinDoisForGraphqlQuery(conceptDois));
 		JsonObject body = new JsonObject();
 		body.addProperty("query", query);
 		String responseJson = Utils.post("https://api.datacite.org/graphql", body.toString(), "Content-Type", "application/json");
 		return parseJson(responseJson);
 	}
 
-	Map<String, Collection<MentionRecord>> parseJson(String json) {
+	Map<Doi, Collection<ExternalMentionRecord>> parseJson(String json) {
 		DataciteMentionRepository dataciteMentionRepository = new DataciteMentionRepository();
 
 		JsonObject root = JsonParser.parseString(json).getAsJsonObject();
 		JsonArray worksJson = root.getAsJsonObject("data").getAsJsonObject("works").getAsJsonArray("nodes");
-		Map<String, Collection<MentionRecord>> releasesPerConceptDoi = new TreeMap<>(String.CASE_INSENSITIVE_ORDER);
+		Map<Doi, Collection<ExternalMentionRecord>> releasesPerConceptDoi = new HashMap<>();
 		for (JsonElement work : worksJson) {
 			try {
 				JsonObject workObject = work.getAsJsonObject();
-				String conceptDoi = workObject.getAsJsonPrimitive("doi").getAsString();
+				String conceptDoiString = workObject.getAsJsonPrimitive("doi").getAsString();
+				Doi conceptDoi = Doi.fromString(conceptDoiString);
 				Integer versionOfCount = Utils.integerOrNull(workObject.get("versionOfCount"));
 				if (versionOfCount == null || versionOfCount.intValue() != 0) {
-					LOGGER.debug("{} is not a concept DOI", conceptDoi);
+					LOGGER.debug("{} is not a concept DOI", conceptDoiString);
 					continue;
 				}
 
-				Collection<String> versionDois = new ArrayList<>();
+				Collection<Doi> versionDois = new ArrayList<>();
 				JsonArray relatedIdentifiers = workObject.getAsJsonArray("relatedIdentifiers");
 				for (JsonElement relatedIdentifier : relatedIdentifiers) {
 					JsonObject relatedIdentifierObject = relatedIdentifier.getAsJsonObject();
@@ -78,13 +79,13 @@ Map<String, Collection<MentionRecord>> parseJson(String json) {
 					if (relatedIdentifierType == null || !relatedIdentifierType.equals("DOI")) continue;
 
 					String relatedIdentifierDoi = relatedIdentifierObject.getAsJsonPrimitive("relatedIdentifier").getAsString();
-					versionDois.add(relatedIdentifierDoi);
+					versionDois.add(Doi.fromString(relatedIdentifierDoi));
 				}
-				Collection<MentionRecord> versionedMentions = dataciteMentionRepository.mentionData(versionDois);
+				Collection<ExternalMentionRecord> versionedMentions = dataciteMentionRepository.mentionData(versionDois);
 
 				releasesPerConceptDoi.put(conceptDoi, versionedMentions);
 			} catch (RuntimeException e) {
-				LOGGER.warn("Failed to scrape a DataCite mention with data {}, ", work, e);
+				LOGGER.error("Failed to scrape a DataCite mention with data {}, ", work, e);
 			}
 		}
 		return releasesPerConceptDoi;
diff --git a/scrapers/src/main/java/nl/esciencecenter/rsd/scraper/doi/DataciteMentionRepository.java b/scrapers/src/main/java/nl/esciencecenter/rsd/scraper/doi/DataciteMentionRepository.java
index 4ff06b198..cd200411a 100644
--- a/scrapers/src/main/java/nl/esciencecenter/rsd/scraper/doi/DataciteMentionRepository.java
+++ b/scrapers/src/main/java/nl/esciencecenter/rsd/scraper/doi/DataciteMentionRepository.java
@@ -14,23 +14,22 @@
 import org.slf4j.LoggerFactory;
 
 import java.net.URI;
-import java.time.Instant;
 import java.time.ZonedDateTime;
 import java.util.ArrayList;
 import java.util.Collection;
 import java.util.Collections;
 import java.util.HashMap;
+import java.util.HashSet;
 import java.util.Map;
 import java.util.Set;
-import java.util.TreeSet;
 import java.util.regex.Matcher;
 import java.util.regex.Pattern;
 import java.util.stream.Collectors;
 
-public class DataciteMentionRepository implements MentionRepository {
+public class DataciteMentionRepository {
 
 	private static final Logger LOGGER = LoggerFactory.getLogger(DataciteMentionRepository.class);
-	
+
 	private static final String QUERY_UNFORMATTED = """
 			query {
 			  works(ids: [%s], first: 10000) {
@@ -111,69 +110,70 @@ public class DataciteMentionRepository implements MentionRepository {
 	}
 
 	// "10.5281/zenodo.1408128","10.1186/s12859-018-2165-7"
-	static String joinCollection(Collection<String> dois) {
+	static String joinDoisForGraphqlQuery(Collection<Doi> dois) {
 		return dois.stream()
+				.map(Doi::toString)
 				.collect(Collectors.joining("\",\"", "\"", "\""));
 	}
 
-	static Collection<MentionRecord> jsonStringToUniqueMentions(String json) {
+	static Collection<ExternalMentionRecord> jsonStringToUniqueMentions(String json) {
 		JsonObject root = JsonParser.parseString(json).getAsJsonObject();
 		JsonArray worksJson = root.getAsJsonObject("data").getAsJsonObject("works").getAsJsonArray("nodes");
-		Collection<MentionRecord> mentions = new ArrayList<>();
-		Set<String> usedDois = new TreeSet<>(String.CASE_INSENSITIVE_ORDER);
+		Collection<ExternalMentionRecord> mentions = new ArrayList<>();
+		Set<Doi> usedDois = new HashSet<>();
 		for (JsonElement work : worksJson) {
 			try {
 				// Sometimes, DataCite gives back two of the same results for one DOI, e.g. for 10.4122/1.1000000817,
 				// so we need to only add it once, otherwise we cannot POST it to the backend
-				MentionRecord parsedMention = parseWork(work.getAsJsonObject());
-				if (usedDois.contains(parsedMention.doi)) continue;
+				ExternalMentionRecord parsedMention = parseWork(work.getAsJsonObject());
+				if (usedDois.contains(parsedMention.doi())) continue;
 
-				usedDois.add(parsedMention.doi);
+				usedDois.add(parsedMention.doi());
 				mentions.add(parsedMention);
 			} catch (RuntimeException e) {
-				// TODO: fix exeption type
-				LOGGER.warn("Failed to scrape a DataCite mention with data {} ", work, e);				
+				// TODO: fix exception type
+				LOGGER.error("Failed to scrape a DataCite mention with data {} ", work, e);
 			}
 		}
 		return mentions;
 	}
 
-	static MentionRecord parseWork(JsonObject work) {
-		MentionRecord result = new MentionRecord();
-		result.doi = work.get("doi").getAsString();
-		result.url = URI.create("https://doi.org/" + Utils.urlEncode(result.doi));
-		result.title = work.getAsJsonArray("titles").get(0).getAsJsonObject().get("title").getAsString();
+	static ExternalMentionRecord parseWork(JsonObject work) {
+		Doi doi = Doi.fromString(work.get("doi").getAsString());
+		URI url = URI.create("https://doi.org/" + Utils.urlEncode(doi.toString()));
+		String title = work.getAsJsonArray("titles").get(0).getAsJsonObject().get("title").getAsString();
 
-		Collection<String> authors = new ArrayList<>();
+		Collection<String> authorsBuilder = new ArrayList<>();
 		Iterable<JsonObject> creators = (Iterable) work.getAsJsonArray("creators");
 		for (JsonObject creator : creators) {
-			addAuthor(authors, creator);
+			addAuthor(authorsBuilder, creator);
 		}
 		Iterable<JsonObject> contributors = (Iterable) work.getAsJsonArray("contributors");
 		for (JsonObject contributor : contributors) {
-			addAuthor(authors, contributor);
+			addAuthor(authorsBuilder, contributor);
 		}
-		result.authors = String.join(", ", authors);
+		String authors = String.join(", ", authorsBuilder);
 
-		result.publisher = Utils.stringOrNull(work.getAsJsonObject("publisher").get("name"));
-		result.publicationYear = Utils.integerOrNull(work.get("publicationYear"));
+		String publisher = Utils.stringOrNull(work.getAsJsonObject("publisher").get("name"));
+		Integer publicationYear = Utils.integerOrNull(work.get("publicationYear"));
 		String doiRegistrationDateString = Utils.stringOrNull(work.get("registered"));
+		ZonedDateTime doiRegistrationDate = null;
 		if (doiRegistrationDateString != null) {
-			result.doiRegistrationDate = ZonedDateTime.parse(doiRegistrationDateString);
+			doiRegistrationDate = ZonedDateTime.parse(doiRegistrationDateString);
 		}
 
+		MentionType mentionType;
 		String dataciteResourceTypeGeneral = Utils.stringOrNull(work.getAsJsonObject("types").get("resourceTypeGeneral"));
 		if (dataciteResourceTypeGeneral != null && dataciteResourceTypeGeneral.equals("Text")) {
 			String dataciteResourceType = Utils.stringOrNull(work.getAsJsonObject("types").get("resourceType"));
 			if (dataciteResourceType != null) dataciteResourceType = dataciteResourceType.strip();
-			result.mentionType = dataciteTextTypeMap.getOrDefault(dataciteResourceType, MentionType.other);
+			mentionType = dataciteTextTypeMap.getOrDefault(dataciteResourceType, MentionType.other);
 		} else {
-			result.mentionType = dataciteTypeMap.getOrDefault(dataciteResourceTypeGeneral, MentionType.other);
+			mentionType = dataciteTypeMap.getOrDefault(dataciteResourceTypeGeneral, MentionType.other);
 		}
-		result.source = "DataCite";
-		result.version = Utils.stringOrNull(work.get("version"));
+		String version = Utils.stringOrNull(work.get("version"));
 		// if the version is null, we can often get the version from a linked Git URL which ends in "/tree/{tag}"
-		if (result.version == null) {
+		if (version == null) {
 			JsonArray relatedIdentifiers = work.getAsJsonArray("relatedIdentifiers");
 			for (JsonElement relatedIdentifier : relatedIdentifiers) {
 				String relatedIdentifierString = Utils.stringOrNull(relatedIdentifier.getAsJsonObject().get("relatedIdentifier"));
@@ -181,15 +181,28 @@ static MentionRecord parseWork(JsonObject work) {
 				if (relatedIdentifierString != null && relatedIdentifierType != null && relatedIdentifierType.equals("URL")) {
 					Matcher tagMatcher = URL_TREE_TAG_PATTERN.matcher(relatedIdentifierString);
 					if (tagMatcher.find()) {
-						result.version = tagMatcher.group(1);
+						version = tagMatcher.group(1);
 						break;
 					}
 				}
 			}
 		}
 
-		result.scrapedAt = Instant.now();
-		return result;
+		return new ExternalMentionRecord(
+				doi,
+				doiRegistrationDate,
+				null,
+				url,
+				title,
+				authors,
+				publisher,
+				publicationYear,
+				null,
+				null,
+				mentionType,
+				"DataCite",
+				version
+		);
 	}
 
 	static void addAuthor(Collection<String> authors, JsonObject author) {
@@ -201,26 +214,14 @@ static void addAuthor(Collection<String> authors, JsonObject author) {
 		else authors.add(givenName + " " + familyName);
 	}
 
-	@Override
-	public Collection<MentionRecord> leastRecentlyScrapedMentions(int limit) {
-		throw new UnsupportedOperationException();
-	}
-
-	@Override
-	public Collection<MentionRecord> mentionData(Collection<String> dois) {
+	public Collection<ExternalMentionRecord> mentionData(Collection<Doi> dois) {
 		if (dois.isEmpty()) {
 			return Collections.emptyList();
 		}
 
 		JsonObject body = new JsonObject();
-		body.addProperty("query", QUERY_UNFORMATTED.formatted(joinCollection(dois)));
+		body.addProperty("query", QUERY_UNFORMATTED.formatted(joinDoisForGraphqlQuery(dois)));
 		String responseJson = Utils.post("https://api.datacite.org/graphql", body.toString(), "Content-Type", "application/json");
 		return jsonStringToUniqueMentions(responseJson);
 	}
-
-	@Override
-	public void save(Collection<MentionRecord> mentions) {
-		throw new UnsupportedOperationException();
-	}
-
 }
diff --git a/scrapers/src/main/java/nl/esciencecenter/rsd/scraper/doi/Doi.java b/scrapers/src/main/java/nl/esciencecenter/rsd/scraper/doi/Doi.java
new file mode 100644
index 000000000..034603f32
--- /dev/null
+++ b/scrapers/src/main/java/nl/esciencecenter/rsd/scraper/doi/Doi.java
@@ -0,0 +1,63 @@
+// SPDX-FileCopyrightText: 2024 Ewan Cahen (Netherlands eScience Center) <e.cahen@esciencecenter.nl>
+// SPDX-FileCopyrightText: 2024 Netherlands eScience Center
+//
+// SPDX-License-Identifier: Apache-2.0
+
+package nl.esciencecenter.rsd.scraper.doi;
+
+import nl.esciencecenter.rsd.scraper.Utils;
+
+import java.util.Locale;
+import java.util.regex.Pattern;
+
+public class Doi {
+
+	private static final Pattern DOI_PATTERN = Pattern.compile("^10(\\.\\w+)+/\\S+$");
+
+	private final String doiString;
+
+	private Doi(String doiString) {
+		this.doiString = doiString.toLowerCase(Locale.ROOT);
+	}
+
+	public static boolean isValid(String doiToTest) {
+		return doiToTest != null && doiToTest.length() <= 255 && DOI_PATTERN.asPredicate().test(doiToTest);
+	}
+
+	public static Doi fromString(String doi) {
+		if (isValid(doi)) {
+			return new Doi(doi);
+		} else {
+			throw new IllegalArgumentException();
+		}
+	}
+
+	public String toUrlEncodedString() {
+		return Utils.urlEncode(doiString);
+	}
+
+	@Override
+	public String toString() {
+		return doiString;
+	}
+
+	@Override
+	public int hashCode() {
+		return doiString.hashCode();
+	}
+
+	@Override
+	public boolean equals(Object other) {
+		if (other == null) {
+			return false;
+		}
+		if (this == other) {
+			return true;
+		}
+		if (other instanceof Doi otherDoi) {
+			return doiString.equals(otherDoi.doiString);
+		}
+
+		return false;
+	}
+}
diff --git a/scrapers/src/main/java/nl/esciencecenter/rsd/scraper/doi/ExternalMentionRecord.java b/scrapers/src/main/java/nl/esciencecenter/rsd/scraper/doi/ExternalMentionRecord.java
new file mode 100644
index 000000000..89973a23b
--- /dev/null
+++ b/scrapers/src/main/java/nl/esciencecenter/rsd/scraper/doi/ExternalMentionRecord.java
@@ -0,0 +1,26 @@
+// SPDX-FileCopyrightText: 2024 Ewan Cahen (Netherlands eScience Center) <e.cahen@esciencecenter.nl>
+// SPDX-FileCopyrightText: 2024 Netherlands eScience Center
+//
+// SPDX-License-Identifier: Apache-2.0
+
+package nl.esciencecenter.rsd.scraper.doi;
+
+import java.net.URI;
+import java.time.ZonedDateTime;
+
+public record ExternalMentionRecord(
+		Doi doi,
+		ZonedDateTime doiRegistrationDate,
+		OpenalexId openalexId,
+		URI url,
+		String title,
+		String authors,
+		String publisher,
+		Integer publicationYear,
+		String journal,
+		String page,
+		MentionType mentionType,
+		String source,
+		String version
+) {
+}
diff --git a/scrapers/src/main/java/nl/esciencecenter/rsd/scraper/doi/MainCitations.java b/scrapers/src/main/java/nl/esciencecenter/rsd/scraper/doi/MainCitations.java
index 93843d6b5..050d9069d 100644
--- a/scrapers/src/main/java/nl/esciencecenter/rsd/scraper/doi/MainCitations.java
+++ b/scrapers/src/main/java/nl/esciencecenter/rsd/scraper/doi/MainCitations.java
@@ -1,5 +1,5 @@
-// SPDX-FileCopyrightText: 2023 Ewan Cahen (Netherlands eScience Center) <e.cahen@esciencecenter.nl>
-// SPDX-FileCopyrightText: 2023 Netherlands eScience Center
+// SPDX-FileCopyrightText: 2023 - 2024 Ewan Cahen (Netherlands eScience Center) <e.cahen@esciencecenter.nl>
+// SPDX-FileCopyrightText: 2023 - 2024 Netherlands eScience Center
 //
 // SPDX-License-Identifier: Apache-2.0
 
@@ -7,80 +7,87 @@
 
 import nl.esciencecenter.rsd.scraper.Config;
 import nl.esciencecenter.rsd.scraper.Utils;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
 
 import java.io.IOException;
-import java.time.ZonedDateTime;
+import java.time.Instant;
 import java.util.ArrayList;
 import java.util.Collection;
 import java.util.UUID;
 
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
 /*
- * Main entry point for citation scraper. 
+ * Main entry point for citation scraper.
  */
 public class MainCitations {
-	
+
 	private static final Logger LOGGER = LoggerFactory.getLogger(MainCitations.class);
-	
+
 	public static void main(String[] args) {
-		
+
 		LOGGER.info("Start scraping citations");
 
 		long start = System.currentTimeMillis();
 
 		try {
-			// Connect to the database to retrieve the 
-			
+			// Connect to the database to retrieve the reference papers to scrape
+
 			String backendUrl = Config.backendBaseUrl();
 			PostgrestCitationRepository localCitationRepository = new PostgrestCitationRepository(backendUrl);
 
 			Collection<CitationData> referencePapersToScrape = localCitationRepository.leastRecentlyScrapedCitations(5);
 			OpenAlexCitations openAlexCitations = new OpenAlexCitations();
-			MentionRepository localMentionRepository = new PostgrestMentionRepository(backendUrl);
+			PostgrestMentionRepository localMentionRepository = new PostgrestMentionRepository(backendUrl);
 			String email = Config.crossrefContactEmail().orElse(null);
-			ZonedDateTime now = ZonedDateTime.now();
+			Instant now = Instant.now();
 
 			for (CitationData citationData : referencePapersToScrape) {
 
 				long t1 = System.currentTimeMillis();
 
-				LOGGER.info("Scraping for {}", citationData.doi);
+				LOGGER.info("Scraping for DOI {}, OpenAlex ID {}", citationData.doi(), citationData.openalexId());
 
-				Collection<MentionRecord> citingMentions = openAlexCitations.citations(citationData.doi, email, citationData.id);
+				Collection<ExternalMentionRecord> citingMentions = openAlexCitations.citations(citationData.openalexId(), citationData.doi(), email, citationData.id());
 				// we don't update mentions that have a DOI in the database with OpenAlex data, as they can already be
-				// scraped through Crossref of DataCite
+				// scraped through Crossref or DataCite
 
 				long t2 = System.currentTimeMillis();
 
-				citingMentions.removeIf(mention -> mention.doi != null && citationData.knownDois.contains(mention.doi));
-				localMentionRepository.save(citingMentions);
+				citingMentions.removeIf(mention -> mention.doi() != null && citationData.knownDois().contains(mention.doi()));
+				Collection<RsdMentionIds> savedIds = new ArrayList<>(citingMentions.size());
+				for (ExternalMentionRecord citingMention : citingMentions) {
+					try {
+						RsdMentionIds ids = localMentionRepository.createOrUpdateMentionWithOpenalexId(citingMention, now);
+						savedIds.add(ids);
+					} catch (Exception e) {
+						LOGGER.error("Unable to save exception with OpenAlex ID {}", citingMention.openalexId());
+						Utils.saveExceptionInDatabase("Citation scraper", "mention", null, e);
+					}
+				}
 
 				Collection<UUID> citingMentionIds = new ArrayList<>();
-				for (MentionRecord citingMention : citingMentions) {
-					citingMentionIds.add(citingMention.id);
+				for (RsdMentionIds ids : savedIds) {
+					citingMentionIds.add(ids.id());
 				}
 
 				long t3 = System.currentTimeMillis();
 
-				localCitationRepository.saveCitations(backendUrl, citationData.id, citingMentionIds, now);
+				localCitationRepository.saveCitations(backendUrl, citationData.id(), citingMentionIds, now);
 
 				long t4 = System.currentTimeMillis();
 
-				LOGGER.info("Scraping for {} done. OpenAlex: {} ms. Saving mentions {} ms. Saving citations {} ms. Total {} ms.", citationData.doi, (t2-t1), (t3-t2), (t4-t3), (t4-t1));				
+				LOGGER.info("Scraping for {} done. OpenAlex: {} ms. Saving mentions {} ms. Saving citations {} ms. Total {} ms.", citationData.doi(), (t2 - t1), (t3 - t2), (t4 - t3), (t4 - t1));
 			}
 
 		} catch (IOException | InterruptedException e) {
 			Utils.saveExceptionInDatabase("Citation scraper", null, null, e);
-			
-			if (e instanceof InterruptedException) { 
+
+			if (e instanceof InterruptedException) {
 				Thread.currentThread().interrupt();
-			}				
+			}
 		}
 
 		long time = System.currentTimeMillis() - start;
-
 		LOGGER.info("Done scraping citations ({} ms.)", time);
 	}
 }
diff --git a/scrapers/src/main/java/nl/esciencecenter/rsd/scraper/doi/MainMentions.java b/scrapers/src/main/java/nl/esciencecenter/rsd/scraper/doi/MainMentions.java
index 2e85bd5c2..7b996e0ad 100644
--- a/scrapers/src/main/java/nl/esciencecenter/rsd/scraper/doi/MainMentions.java
+++ b/scrapers/src/main/java/nl/esciencecenter/rsd/scraper/doi/MainMentions.java
@@ -15,36 +15,35 @@
 import org.slf4j.LoggerFactory;
 
 import java.time.Instant;
-import java.util.ArrayList;
 import java.util.Collection;
 import java.util.HashMap;
+import java.util.List;
 import java.util.Map;
-import java.util.TreeMap;
 import java.util.stream.Collectors;
 
 public class MainMentions {
 
 	private static final Logger LOGGER = LoggerFactory.getLogger(MainMentions.class);
-	
+
 	public static void main(String[] args) {
-		
+
 		LOGGER.info("Start scraping mentions");
-		
+
 		long t1 = System.currentTimeMillis();
-		
-		MentionRepository localMentionRepository = new PostgrestMentionRepository(Config.backendBaseUrl());
-		Collection<MentionRecord> mentionsToScrape = localMentionRepository.leastRecentlyScrapedMentions(Config.maxRequestsDoi());
+
+		PostgrestMentionRepository localMentionRepository = new PostgrestMentionRepository(Config.backendBaseUrl());
+		Collection<RsdMentionIds> mentionsToScrape = localMentionRepository.leastRecentlyScrapedMentions(Config.maxRequestsDoi());
 		// we will remove successfully scraped mentions from here,
 		// we use this to set scrapedAt even for failed mentions,
-		// to put them back at the scraping order
-		Map<String, MentionRecord> mentionsFailedToScrape = new TreeMap<>(String.CASE_INSENSITIVE_ORDER);
-		for (MentionRecord mentionRecord : mentionsToScrape) {
-			mentionsFailedToScrape.put(mentionRecord.doi, mentionRecord);
+		// to put them back at the scraping queue
+		Map<Doi, RsdMentionIds> mentionsFailedToScrape = new HashMap<>();
+		for (RsdMentionIds mentionIds : mentionsToScrape) {
+			mentionsFailedToScrape.put(mentionIds.doi(), mentionIds);
 		}
 
 		String doisJoined = mentionsToScrape.stream()
-				.map(mention -> mention.doi)
-				.map(Utils::urlEncode)
+				.map(RsdMentionIds::doi)
+				.map(Doi::toUrlEncodedString)
 				.collect(Collectors.joining(","));
 		String jsonSources = null;
 		try {
@@ -54,75 +53,110 @@ public static void main(String[] args) {
 			System.exit(1);
 		}
 
-		Map<String, String> doiToSource = parseJsonSources(jsonSources);
+		Map<String, String> doiToSource = parseJsonDoiSources(jsonSources);
 
-		Collection<MentionRecord> scrapedMentions = new ArrayList<>();
-		Collection<String> dataciteDois = doiToSource.entrySet()
+		Instant now = Instant.now();
+
+		// DATACITE
+		Collection<Doi> dataciteDois = doiToSource.entrySet()
 				.stream()
 				.filter(doiSourceEntry -> doiSourceEntry.getValue().equals("DataCite"))
 				.map(Map.Entry::getKey)
+				.map(Doi::fromString)
 				.toList();
+		Collection<ExternalMentionRecord> scrapedDataciteMentions = List.of();
 		try {
-			scrapedMentions.addAll(new DataciteMentionRepository().mentionData(dataciteDois));
+			scrapedDataciteMentions = new DataciteMentionRepository().mentionData(dataciteDois);
 		} catch (RuntimeException e) {
 			Utils.saveExceptionInDatabase("DataCite mention scraper", "mention", null, e);
 		}
-		for (MentionRecord scrapedMention : scrapedMentions) {
-			mentionsFailedToScrape.remove(scrapedMention.doi);
+		for (ExternalMentionRecord scrapedMention : scrapedDataciteMentions) {
+			Doi doi = scrapedMention.doi();
+			RsdMentionIds ids = mentionsFailedToScrape.get(doi);
+			try {
+				RsdMentionRecord mentionToUpdate = new RsdMentionRecord(ids.id(), scrapedMention, now);
+				localMentionRepository.updateMention(mentionToUpdate, false);
+				mentionsFailedToScrape.remove(doi);
+			} catch (Exception e) {
+				LOGGER.error("Failed to update a DataCite mention with DOI {}", scrapedMention.doi());
+				Utils.saveExceptionInDatabase("Mention scraper", "mention", ids.id(), e);
+			}
+
 		}
+		// END DATACITE
 
-		Collection<String> crossrefDois = doiToSource.entrySet()
+		// CROSSREF
+		Collection<Doi> crossrefDois = doiToSource.entrySet()
 				.stream()
 				.filter(doiSourceEntry -> doiSourceEntry.getValue().equals("Crossref"))
 				.map(Map.Entry::getKey)
+				.map(Doi::fromString)
 				.toList();
-		for (String crossrefDoi : crossrefDois) {
+		for (Doi crossrefDoi : crossrefDois) {
+			ExternalMentionRecord scrapedMention;
 			try {
-				MentionRecord scrapedMention = new CrossrefMention(crossrefDoi).mentionData();
-				scrapedMentions.add(scrapedMention);
-				mentionsFailedToScrape.remove(scrapedMention.doi);
+				scrapedMention = new CrossrefMention(crossrefDoi).mentionData();
 			} catch (Exception e) {
+				LOGGER.error("Failed to scrape a Crossref mention with DOI {}", crossrefDoi);
 				RuntimeException exceptionWithMessage = new RuntimeException("Failed to scrape a Crossref mention with DOI " + crossrefDoi, e);
-				Utils.saveExceptionInDatabase("Crossref mention scraper", "mention", null, exceptionWithMessage);
+				Utils.saveExceptionInDatabase("Crossref mention scraper", "mention", mentionsFailedToScrape.get(crossrefDoi).id(), exceptionWithMessage);
+				continue;
+			}
+			Doi doi = scrapedMention.doi();
+			RsdMentionIds ids = mentionsFailedToScrape.get(doi);
+			RsdMentionRecord mentionToUpdate = new RsdMentionRecord(ids.id(), scrapedMention, now);
+			try {
+				localMentionRepository.updateMention(mentionToUpdate, false);
+				mentionsFailedToScrape.remove(doi);
+			} catch (Exception e) {
+				RuntimeException exceptionWithMessage = new RuntimeException("Failed to update a Crossref mention with DOI " + crossrefDoi, e);
+				Utils.saveExceptionInDatabase("Crossref mention scraper", "mention", ids.id(), exceptionWithMessage);
 			}
 		}
+		// END CROSSREF
 
+		// OPENALEX (for European Publication Office DOIs)
 		String email = Config.crossrefContactEmail().orElse(null);
-		Collection<String> europeanPublicationsOfficeDois = doiToSource.entrySet()
+		Collection<ExternalMentionRecord> scrapedOpenalexMentions = List.of();
+		Collection<Doi> europeanPublicationsOfficeDois = doiToSource.entrySet()
 				.stream()
 				.filter(doiSourceEntry -> doiSourceEntry.getValue().equals("OP"))
 				.map(Map.Entry::getKey)
+				.map(Doi::fromString)
 				.toList();
 		try {
-			Collection<MentionRecord> openalexMentions = new OpenAlexCitations().mentionData(europeanPublicationsOfficeDois, email);
-			for (MentionRecord openalexMention : openalexMentions) {
-				mentionsFailedToScrape.remove(openalexMention.doi);
-				scrapedMentions.add(openalexMention);
-			}
+			scrapedOpenalexMentions = new OpenAlexCitations().mentionData(europeanPublicationsOfficeDois, email);
 		} catch (Exception e) {
-			Utils.saveExceptionInDatabase("DataCite mention scraper", "mention", null, e);
+			Utils.saveExceptionInDatabase("OpenAlex mention scraper", "mention", null, e);
 		}
-
-		Instant now = Instant.now();
-		for (MentionRecord mention : mentionsFailedToScrape.values()) {
-			mention.scrapedAt = now;
-			LOGGER.info("Failed to scrape mention with DOI {}", mention.doi);
+		for (ExternalMentionRecord scrapedMention : scrapedOpenalexMentions) {
+			Doi doi = scrapedMention.doi();
+			RsdMentionIds ids = mentionsFailedToScrape.get(doi);
+			RsdMentionRecord mentionToUpdate = new RsdMentionRecord(ids.id(), scrapedMention, now);
+			try {
+				localMentionRepository.updateMention(mentionToUpdate, true);
+				mentionsFailedToScrape.remove(doi);
+			} catch (Exception e) {
+				LOGGER.error("Failed to update an OpenAlex mention with DOI {}", scrapedMention.doi());
+				Utils.saveExceptionInDatabase("Mention scraper", "mention", ids.id(), e);
+			}
 		}
-		scrapedMentions.addAll(mentionsFailedToScrape.values());
+		// END OPENALEX
 
-
-		try {
-			localMentionRepository.save(scrapedMentions);
-		} catch (RuntimeException e) {
-			Utils.saveExceptionInDatabase("Mention scraper", "mention", null, e);
+		for (RsdMentionIds ids : mentionsFailedToScrape.values()) {
+			LOGGER.error("Failed to scrape mention with DOI {}", ids.doi());
+			try {
+				localMentionRepository.saveScrapedAt(ids, now);
+			} catch (RuntimeException e) {
+				Utils.saveExceptionInDatabase("Mention scraper", "mention", ids.id(), e);
+			}
 		}
 
 		long time = System.currentTimeMillis() - t1;
-
 		LOGGER.info("Done scraping mentions ({} ms.)", time);
 	}
 
-	static Map<String, String> parseJsonSources(String jsonSources) {
+	static Map<String, String> parseJsonDoiSources(String jsonSources) {
 		JsonArray sourcesArray = JsonParser.parseString(jsonSources).getAsJsonArray();
 		Map<String, String> result = new HashMap<>();
 		for (JsonElement jsonElement : sourcesArray) {
diff --git a/scrapers/src/main/java/nl/esciencecenter/rsd/scraper/doi/MainReleases.java b/scrapers/src/main/java/nl/esciencecenter/rsd/scraper/doi/MainReleases.java
index 49400429f..98cb8869d 100644
--- a/scrapers/src/main/java/nl/esciencecenter/rsd/scraper/doi/MainReleases.java
+++ b/scrapers/src/main/java/nl/esciencecenter/rsd/scraper/doi/MainReleases.java
@@ -1,20 +1,21 @@
-// SPDX-FileCopyrightText: 2023 Ewan Cahen (Netherlands eScience Center) <e.cahen@esciencecenter.nl>
-// SPDX-FileCopyrightText: 2023 Netherlands eScience Center
+// SPDX-FileCopyrightText: 2023 - 2024 Ewan Cahen (Netherlands eScience Center) <e.cahen@esciencecenter.nl>
+// SPDX-FileCopyrightText: 2023 - 2024 Netherlands eScience Center
 //
 // SPDX-License-Identifier: Apache-2.0
 
 package nl.esciencecenter.rsd.scraper.doi;
 
 import nl.esciencecenter.rsd.scraper.Config;
+import nl.esciencecenter.rsd.scraper.Utils;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
 
+import java.time.Instant;
 import java.util.Collection;
+import java.util.HashMap;
 import java.util.Map;
-import java.util.TreeMap;
 import java.util.UUID;
 
-
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
 /*
  * 1. Get the least recently scraped releases from software with a concept DOI. We also check for existing releases that already exist as a mention in the database, so we don't have to (TODO) recreate them later.
  * 2. For each release check if it's a concept DOI on DataCite and get all the versioned DOIs.
@@ -23,37 +24,42 @@
 public class MainReleases {
 
 	private static final Logger LOGGER = LoggerFactory.getLogger(MainReleases.class);
-	
+
 	public static void main(String[] args) {
-		
+
 		LOGGER.info("Start scraping releases");
-		
+
 		long t1 = System.currentTimeMillis();
-		
+
 		PostgrestReleaseRepository releaseRepository = new PostgrestReleaseRepository(Config.backendBaseUrl());
 
 		Collection<ReleaseData> releasesToScrape = releaseRepository.leastRecentlyScrapedReleases(Config.maxRequestsDoi());
 
-		Collection<String> conceptDoisToScrape = releasesToScrape.stream()
+		Collection<Doi> conceptDoisToScrape = releasesToScrape.stream()
 				.map(releaseData -> releaseData.conceptDoi)
 				.toList();
 
-		Map<String, Collection<MentionRecord>> scrapedReleasesPerConceptDoi = new DataCiteReleaseRepository().getVersionedDois(conceptDoisToScrape);
+		Map<Doi, Collection<ExternalMentionRecord>> scrapedReleasesPerConceptDoi = new DataCiteReleaseRepository().getVersionedDois(conceptDoisToScrape);
 
-		MentionRepository localMentionRepository = new PostgrestMentionRepository(Config.backendBaseUrl());
-		Collection<MentionRecord> allMentions = scrapedReleasesPerConceptDoi.values().stream()
+		Instant now = Instant.now();
+		PostgrestMentionRepository localMentionRepository = new PostgrestMentionRepository(Config.backendBaseUrl());
+		Collection<ExternalMentionRecord> allMentions = scrapedReleasesPerConceptDoi.values().stream()
 				.flatMap(Collection::stream)
 				.toList();
-		localMentionRepository.save(allMentions);
-		Map<String, UUID> doiToId = new TreeMap<>(String.CASE_INSENSITIVE_ORDER);
-		for (MentionRecord mention : allMentions) {
-			doiToId.put(mention.doi, mention.id);
+		Map<Doi, UUID> doiToId = new HashMap<>();
+		for (ExternalMentionRecord mention : allMentions) {
+			try {
+				RsdMentionIds ids = localMentionRepository.createMentionIfNotExistsOnDoiAndGetIds(mention, now);
+				doiToId.put(mention.doi(), ids.id());
+			} catch (Exception e) {
+				LOGGER.error("Unable to save mention with DOI {}", mention.doi());
+				Utils.saveExceptionInDatabase("Releases scraper", "mention", null, e);
+			}
 		}
 
 		releaseRepository.saveReleaseContent(releasesToScrape, scrapedReleasesPerConceptDoi, doiToId);
-		
+
 		long time = System.currentTimeMillis() - t1;
-		
 		LOGGER.info("Done scraping releases ({} ms.)", time);
 	}
 }
diff --git a/scrapers/src/main/java/nl/esciencecenter/rsd/scraper/doi/Mention.java b/scrapers/src/main/java/nl/esciencecenter/rsd/scraper/doi/Mention.java
deleted file mode 100644
index 654201b09..000000000
--- a/scrapers/src/main/java/nl/esciencecenter/rsd/scraper/doi/Mention.java
+++ /dev/null
@@ -1,15 +0,0 @@
-// SPDX-FileCopyrightText: 2022 - 2023 Ewan Cahen (Netherlands eScience Center) <e.cahen@esciencecenter.nl>
-// SPDX-FileCopyrightText: 2022 - 2023 Netherlands eScience Center
-//
-// SPDX-License-Identifier: Apache-2.0
-
-package nl.esciencecenter.rsd.scraper.doi;
-
-import java.io.IOException;
-
-import nl.esciencecenter.rsd.scraper.RsdResponseException;
-
-public interface Mention {
-
-	MentionRecord mentionData() throws IOException, InterruptedException, RsdResponseException;
-}
diff --git a/scrapers/src/main/java/nl/esciencecenter/rsd/scraper/doi/MentionRecord.java b/scrapers/src/main/java/nl/esciencecenter/rsd/scraper/doi/MentionRecord.java
deleted file mode 100644
index 421e4e0f5..000000000
--- a/scrapers/src/main/java/nl/esciencecenter/rsd/scraper/doi/MentionRecord.java
+++ /dev/null
@@ -1,52 +0,0 @@
-// SPDX-FileCopyrightText: 2022 - 2023 Ewan Cahen (Netherlands eScience Center) <e.cahen@esciencecenter.nl>
-// SPDX-FileCopyrightText: 2022 - 2023 Netherlands eScience Center
-//
-// SPDX-License-Identifier: Apache-2.0
-
-package nl.esciencecenter.rsd.scraper.doi;
-
-import java.net.URI;
-import java.time.Instant;
-import java.time.ZonedDateTime;
-import java.util.UUID;
-
-public class MentionRecord {
-	UUID id;
-	String doi;
-	URI url;
-	String title;
-	String authors;
-	String publisher;
-	Integer publicationYear;
-	ZonedDateTime doiRegistrationDate;
-	String journal;
-	String page;
-	URI imageUrl;
-	MentionType mentionType;
-	String externalId;
-	String source;
-	Instant scrapedAt;
-	String version;
-
-	@Override
-	public String toString() {
-		return "MentionRecord{" +
-				"id=" + id +
-				", doi='" + doi + '\'' +
-				", url=" + url +
-				", title='" + title + '\'' +
-				", authors='" + authors + '\'' +
-				", publisher='" + publisher + '\'' +
-				", publicationYear=" + publicationYear +
-				", doiRegistrationDate=" + doiRegistrationDate +
-				", journal='" + journal + '\'' +
-				", page='" + page + '\'' +
-				", imageUrl=" + imageUrl +
-				", mentionType=" + mentionType +
-				", externalId='" + externalId + '\'' +
-				", source='" + source + '\'' +
-				", scrapedAt=" + scrapedAt +
-				", version='" + version + '\'' +
-				'}';
-	}
-}
diff --git a/scrapers/src/main/java/nl/esciencecenter/rsd/scraper/doi/MentionRepository.java b/scrapers/src/main/java/nl/esciencecenter/rsd/scraper/doi/MentionRepository.java
deleted file mode 100644
index 5a1abee81..000000000
--- a/scrapers/src/main/java/nl/esciencecenter/rsd/scraper/doi/MentionRepository.java
+++ /dev/null
@@ -1,17 +0,0 @@
-// SPDX-FileCopyrightText: 2022 - 2023 Ewan Cahen (Netherlands eScience Center) <e.cahen@esciencecenter.nl>
-// SPDX-FileCopyrightText: 2022 - 2023 Netherlands eScience Center
-//
-// SPDX-License-Identifier: Apache-2.0
-
-package nl.esciencecenter.rsd.scraper.doi;
-
-import java.util.Collection;
-
-public interface MentionRepository {
-
-	Collection<MentionRecord> leastRecentlyScrapedMentions(int limit);
-
-	Collection<MentionRecord> mentionData(Collection<String> dois);
-
-	void save(Collection<MentionRecord> mentions);
-}
diff --git a/scrapers/src/main/java/nl/esciencecenter/rsd/scraper/doi/OpenAlexCitations.java b/scrapers/src/main/java/nl/esciencecenter/rsd/scraper/doi/OpenAlexCitations.java
index 2958dac86..1e997b8b8 100644
--- a/scrapers/src/main/java/nl/esciencecenter/rsd/scraper/doi/OpenAlexCitations.java
+++ b/scrapers/src/main/java/nl/esciencecenter/rsd/scraper/doi/OpenAlexCitations.java
@@ -10,11 +10,12 @@
 import com.google.gson.JsonObject;
 import com.google.gson.JsonParser;
 import nl.esciencecenter.rsd.scraper.Utils;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
 
 import java.io.IOException;
 import java.net.URI;
 import java.net.http.HttpResponse;
-import java.time.Instant;
 import java.util.ArrayList;
 import java.util.Collection;
 import java.util.Collections;
@@ -27,10 +28,17 @@
 
 class OpenAlexCitations {
 
-	static final String DOI_FILTER_URL_UNFORMATTED = "https://api.openalex.org/works?filter=doi:%s";
+	private static final Logger LOGGER = LoggerFactory.getLogger(OpenAlexCitations.class);
 
-	public Collection<MentionRecord> mentionData(Collection<String> dataciteDois, String email) throws IOException, InterruptedException {
-		String filter = dataciteDois.stream().filter(Objects::nonNull).collect(Collectors.joining("|"));
+	static final String DOI_FILTER_URL_UNFORMATTED = "https://api.openalex.org/works?filter=doi:%s";
+	static final String OPENALEX_ID_URL_UNFORMATTED = "https://api.openalex.org/works?filter=ids.openalex:%s";
+
+	public Collection<ExternalMentionRecord> mentionData(Collection<Doi> dataciteDois, String email) throws IOException, InterruptedException {
+		String filter = dataciteDois
+				.stream()
+				.filter(Objects::nonNull)
+				.map(Doi::toString)
+				.collect(Collectors.joining("|"));
 		String worksUri = DOI_FILTER_URL_UNFORMATTED.formatted(Utils.urlEncode(filter)) + "&per-page=200";
 
 		HttpResponse<String> response;
@@ -44,12 +52,11 @@ public Collection<MentionRecord> mentionData(Collection<String> dataciteDois, St
 		JsonArray citationsArray = tree
 				.getAsJsonArray("results");
 
-		Collection<MentionRecord> mentions = new ArrayList<>();
-		Instant now = Instant.now();
+		Collection<ExternalMentionRecord> mentions = new ArrayList<>();
 		for (JsonElement citation : citationsArray) {
-			MentionRecord citationAsMention;
+			ExternalMentionRecord citationAsMention;
 			try {
-				citationAsMention = parseCitationAsMention(citation, now);
+				citationAsMention = parseCitationAsMention(citation);
 			} catch (RuntimeException e) {
 				Utils.saveExceptionInDatabase("OpenAlex mention scraper", "mention", null, e);
 				continue;
@@ -60,10 +67,15 @@ public Collection<MentionRecord> mentionData(Collection<String> dataciteDois, St
 		return mentions;
 	}
 
-	public Collection<MentionRecord> citations(String doi, String email, UUID id) throws IOException, InterruptedException {
+	public Collection<ExternalMentionRecord> citations(OpenalexId openalexId, Doi doi, String email, UUID id) throws IOException, InterruptedException {
+		// This shouldn't happen, but let's check it to prevent unexpected exceptions:
+		if (doi == null && openalexId == null) {
+			return Collections.emptyList();
+		}
 
-		String doiUrlEncoded = Utils.urlEncode(doi);
-		String worksUri = DOI_FILTER_URL_UNFORMATTED.formatted(doiUrlEncoded);
+		String worksUri = openalexId != null
+				? OPENALEX_ID_URL_UNFORMATTED.formatted(openalexId.toUrlEncodedString())
+				: DOI_FILTER_URL_UNFORMATTED.formatted(doi.toUrlEncodedString());
 
 		Optional<String> optionalCitationsUri = citationsUri(worksUri, email);
 		if (optionalCitationsUri.isEmpty()) {
@@ -88,27 +100,38 @@ static Optional<String> citationsUri(String worksUri, String email) throws IOExc
 				.getAsJsonPrimitive("count")
 				.getAsInt();
 
-		if (count == 0 || count > 1) {
+		if (count < 1) {
+			LOGGER.warn("No results found for {}: {}", worksUri, count);
 			return Optional.empty();
 		}
 
-		String citationsUri = tree
-				.getAsJsonArray("results")
-				.get(0)
-				.getAsJsonObject()
-				.getAsJsonPrimitive("cited_by_api_url")
-				.getAsString();
+		if (count > 1) {
+			LOGGER.warn("More than 1 result found for {}: {}, taking the first", worksUri, count);
+		}
+
+		String citationsUri = null;
+		try {
+			citationsUri = tree
+					.getAsJsonArray("results")
+					.get(0)
+					.getAsJsonObject()
+					.getAsJsonPrimitive("cited_by_api_url")
+					.getAsString();
+		} catch (RuntimeException e) {
+			LOGGER.error("Exception parsing cited_by_api_url for %s".formatted(worksUri), e);
+			Utils.saveExceptionInDatabase("OpenAlex citations scraper", null, null, e);
+		}
 
-		return Optional.of(citationsUri);
+		return Optional.ofNullable(citationsUri);
 	}
 
 	// we use cursor paging as that will always work
 	// https://docs.openalex.org/how-to-use-the-api/get-lists-of-entities/paging#cursor-paging
-	static Collection<MentionRecord> scrapeCitations(String citationsUri, String email, UUID id) throws IOException, InterruptedException {
+	static Collection<ExternalMentionRecord> scrapeCitations(String citationsUri, String email, UUID id) throws IOException, InterruptedException {
 		final int perPage = 200;
 		String cursor = "*";
 
-		Collection<MentionRecord> citations = new ArrayList<>();
+		Collection<ExternalMentionRecord> citations = new ArrayList<>();
 		while (cursor != null) {
 			HttpResponse<String> response;
 			String citationsUriWithCursor = citationsUri + "&per-page=" + perPage + "&cursor=" + cursor;
@@ -127,11 +150,10 @@ static Collection<MentionRecord> scrapeCitations(String citationsUri, String ema
 			JsonArray citationsArray = tree
 					.getAsJsonArray("results");
 
-			Instant now = Instant.now();
 			for (JsonElement citation : citationsArray) {
-				MentionRecord citationAsMention;
+				ExternalMentionRecord citationAsMention;
 				try {
-					citationAsMention = parseCitationAsMention(citation, now);
+					citationAsMention = parseCitationAsMention(citation);
 				} catch (RuntimeException e) {
 					Utils.saveExceptionInDatabase("Citation scraper", "mention", id, e);
 					continue;
@@ -143,70 +165,68 @@ static Collection<MentionRecord> scrapeCitations(String citationsUri, String ema
 		return citations;
 	}
 
-	static MentionRecord parseCitationAsMention(JsonElement element, Instant scrapedAt) {
+	static ExternalMentionRecord parseCitationAsMention(JsonElement element) {
 		JsonObject citationObject = element.getAsJsonObject();
 
-		MentionRecord mention = new MentionRecord();
-
 		String doiUrl = Utils.stringOrNull(citationObject.get("doi"));
-		String doi = doiUrl;
-		if (doi != null) {
-			doi = doi.replace("https://doi.org/", "");
+		String doiString = doiUrl;
+		if (doiString != null) {
+			doiString = doiString.replace("https://doi.org/", "");
 		}
-		mention.doi = doi;
+		Doi doi = doiString == null ? null : Doi.fromString(doiString);
 
+		URI url;
 		if (doiUrl != null) {
-			mention.url = URI.create(doiUrl);
+			url = URI.create(doiUrl);
 		} else {
 			JsonArray locations = citationObject.getAsJsonArray("locations");
-			mention.url = extractUrlFromLocation(locations);
+			url = extractUrlFromLocation(locations);
 		}
 
-		mention.title = Utils.stringOrNull(citationObject.get("title"));
-		if (mention.title == null) {
+		String title = Utils.stringOrNull(citationObject.get("title"));
+		if (title == null) {
 			String openAlexId = citationObject.getAsJsonPrimitive("id").getAsString();
-			String message = "The title of the mention with DOI %s and OpenAlex ID %s is null".formatted(doi, openAlexId);
+			String message = "The title of the mention with DOI %s and OpenAlex ID %s is null".formatted(doiString, openAlexId);
 			throw new RuntimeException(message);
 		}
 
 		JsonArray authorsArray = citationObject.getAsJsonArray("authorships");
-		mention.authors = StreamSupport.stream(authorsArray.spliterator(), false)
+		String authors = StreamSupport.stream(authorsArray.spliterator(), false)
 				.map(JsonElement::getAsJsonObject)
 				.map(jo -> jo.get("raw_author_name"))
 				.filter(Predicate.not(JsonElement::isJsonNull))
 				.map(JsonElement::getAsString)
 				.collect(Collectors.joining(", "));
-		if (mention.authors.isBlank()) {
-			mention.authors = null;
+		if (authors.isBlank()) {
+			authors = null;
 		}
 
-		mention.publisher = null;
-
-		mention.publicationYear = Utils.integerOrNull(citationObject.get("publication_year"));
-
-		mention.doiRegistrationDate = null;
-
-		mention.journal = null;
-
-		mention.page = null;
-
-		mention.imageUrl = null;
+		Integer publicationYear = Utils.integerOrNull(citationObject.get("publication_year"));
 
 		String crossrefType = Utils.stringOrNull(citationObject.get("type_crossref"));
-		mention.mentionType = CrossrefMention.crossrefTypeMap.getOrDefault(crossrefType, MentionType.other);
+		MentionType mentionType = CrossrefMention.crossrefTypeMap.getOrDefault(crossrefType, MentionType.other);
 
-		mention.externalId = citationObject
+		String openalexIdString = citationObject
 				.getAsJsonObject("ids")
 				.getAsJsonPrimitive("openalex")
 				.getAsString();
-
-		mention.source = "OpenAlex";
-
-		mention.scrapedAt = scrapedAt;
-
-		mention.version = null;
-
-		return mention;
+		OpenalexId openalexId = OpenalexId.fromString(openalexIdString);
+
+		return new ExternalMentionRecord(
+				doi,
+				null,
+				openalexId,
+				url,
+				title,
+				authors,
+				null,
+				publicationYear,
+				null,
+				null,
+				mentionType,
+				"OpenAlex",
+				null
+		);
 	}
 
 	static URI extractUrlFromLocation(JsonArray locations) {
diff --git a/scrapers/src/main/java/nl/esciencecenter/rsd/scraper/doi/OpenalexId.java b/scrapers/src/main/java/nl/esciencecenter/rsd/scraper/doi/OpenalexId.java
new file mode 100644
index 000000000..def269a88
--- /dev/null
+++ b/scrapers/src/main/java/nl/esciencecenter/rsd/scraper/doi/OpenalexId.java
@@ -0,0 +1,72 @@
+// SPDX-FileCopyrightText: 2024 Ewan Cahen (Netherlands eScience Center) <e.cahen@esciencecenter.nl>
+// SPDX-FileCopyrightText: 2024 Netherlands eScience Center
+//
+// SPDX-License-Identifier: Apache-2.0
+
+package nl.esciencecenter.rsd.scraper.doi;
+
+import nl.esciencecenter.rsd.scraper.Utils;
+
+import java.util.Locale;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+// https://docs.openalex.org/how-to-use-the-api/get-single-entities#the-openalex-id
+public class OpenalexId {
+
+	private static final Pattern OPENALEX_PATTERN = Pattern.compile("^https://openalex\\.org/([WwAaSsIiCcPpFf]\\d{3,13})$");
+	private static final String OPENALEX_ID_BASE = "https://openalex.org/";
+
+	private final String openalexKey;
+
+	private OpenalexId(String openalexString) {
+		this.openalexKey = openalexString.toUpperCase(Locale.ROOT);
+	}
+
+	public static boolean isValid(String idToTest) {
+		return idToTest != null && OPENALEX_PATTERN.asPredicate().test(idToTest);
+	}
+
+	public static OpenalexId fromString(String id) {
+		if (id == null) {
+			throw new IllegalArgumentException("The ID cannot be null");
+		}
+		Matcher matcher = OPENALEX_PATTERN.matcher(id);
+
+		if (!matcher.find()) {
+			throw new IllegalArgumentException("This is an invalid OpenAlex ID");
+		}
+
+		String key = matcher.group(1);
+		return new OpenalexId(key);
+	}
+
+	public String toUrlEncodedString() {
+		return Utils.urlEncode(toString());
+	}
+
+	@Override
+	public String toString() {
+		return OPENALEX_ID_BASE + openalexKey;
+	}
+
+	@Override
+	public int hashCode() {
+		return openalexKey.hashCode();
+	}
+
+	@Override
+	public boolean equals(Object other) {
+		if (other == null) {
+			return false;
+		}
+		if (this == other) {
+			return true;
+		}
+		if (other instanceof OpenalexId otherOpenalexId) {
+			return openalexKey.equals(otherOpenalexId.openalexKey);
+		}
+
+		return false;
+	}
+}
diff --git a/scrapers/src/main/java/nl/esciencecenter/rsd/scraper/doi/PostgrestCitationRepository.java b/scrapers/src/main/java/nl/esciencecenter/rsd/scraper/doi/PostgrestCitationRepository.java
index 34a5ff60b..cad4b1ba0 100644
--- a/scrapers/src/main/java/nl/esciencecenter/rsd/scraper/doi/PostgrestCitationRepository.java
+++ b/scrapers/src/main/java/nl/esciencecenter/rsd/scraper/doi/PostgrestCitationRepository.java
@@ -1,5 +1,5 @@
-// SPDX-FileCopyrightText: 2023 Ewan Cahen (Netherlands eScience Center) <e.cahen@esciencecenter.nl>
-// SPDX-FileCopyrightText: 2023 Netherlands eScience Center
+// SPDX-FileCopyrightText: 2023 - 2024 Ewan Cahen (Netherlands eScience Center) <e.cahen@esciencecenter.nl>
+// SPDX-FileCopyrightText: 2023 - 2024 Netherlands eScience Center
 //
 // SPDX-License-Identifier: Apache-2.0
 
@@ -11,32 +11,30 @@
 import com.google.gson.JsonParser;
 import nl.esciencecenter.rsd.scraper.Utils;
 
-import java.time.ZonedDateTime;
-import java.time.format.DateTimeFormatter;
+import java.time.Instant;
 import java.util.ArrayList;
 import java.util.Collection;
-import java.util.Objects;
-import java.util.TreeSet;
 import java.util.HashSet;
+import java.util.Objects;
 import java.util.UUID;
 
 /**
- * This class provides access to the citation related tables via the Postgrest API.  
+ * This class provides access to the citation related tables via the Postgrest API.
  */
 public class PostgrestCitationRepository {
 
-	// The base URL of the backend. 
+	// The base URL of the backend.
 	private final String backendUrl;
-	
+
 	public PostgrestCitationRepository(String backendUrl) {
 		this.backendUrl = Objects.requireNonNull(backendUrl);
 	}
 
 	/**
 	 * Retrieve the least recently scraped reference papers from the database.
-	 *    
+	 *
 	 * @param limit the maximum number of references to return
-	 * @return A collection of citation data representing these reference papers. 
+	 * @return A collection of citation data representing these reference papers.
 	 */
 	public Collection<CitationData> leastRecentlyScrapedCitations(int limit) {
 		String oneHourAgoFilter = Utils.atLeastOneHourAgoFilter("citations_scraped_at");
@@ -45,23 +43,23 @@ public Collection<CitationData> leastRecentlyScrapedCitations(int limit) {
 		return parseJson(data);
 	}
 
-	public void saveCitations(String backendUrl, UUID idCitedMention, Collection<UUID> citingMentions, ZonedDateTime scrapedAt) {
-		String jsonPatch = "{\"citations_scraped_at\": \"%s\"}".formatted(scrapedAt.format(DateTimeFormatter.ISO_OFFSET_DATE_TIME));
+	public void saveCitations(String backendUrl, UUID idCitedMention, Collection<UUID> citingMentions, Instant scrapedAt) {
+		String jsonPatch = "{\"citations_scraped_at\": \"%s\"}".formatted(scrapedAt.toString());
 		Utils.patchAsAdmin(backendUrl + "/mention?id=eq." + idCitedMention.toString(), jsonPatch);
 
 		JsonArray jsonArray = new JsonArray();
 
-                // We sometimes encouter duplicate citations which may lead to the operation to fail.
+		// We sometimes encounter duplicate citations which may lead to the operation to fail.
 		HashSet<String> seen = new HashSet<>();
 
 		for (UUID citingMention : citingMentions) {
 
 			if (citingMention != null) {
- 				String citationID = citingMention.toString();
+				String citationID = citingMention.toString();
 
 				if (!seen.contains(citationID)) {
 					seen.add(citationID);
-	  				JsonObject jsonObject = new JsonObject();
+					JsonObject jsonObject = new JsonObject();
 					jsonObject.addProperty("mention", idCitedMention.toString());
 					jsonObject.addProperty("citation", citationID);
 					jsonArray.add(jsonObject);
@@ -75,23 +73,26 @@ public void saveCitations(String backendUrl, UUID idCitedMention, Collection<UUI
 	}
 
 	private Collection<CitationData> parseJson(String data) {
-		
+
 		JsonArray array = JsonParser.parseString(data).getAsJsonArray();
 		Collection<CitationData> result = new ArrayList<>();
 
 		for (JsonElement jsonElement : array) {
 			JsonObject jsonObject = jsonElement.getAsJsonObject();
 			UUID id = UUID.fromString(jsonObject.getAsJsonPrimitive("id").getAsString());
-			String doi = jsonObject.getAsJsonPrimitive("doi").getAsString();
+			String doiString = Utils.stringOrNull(jsonObject.get("doi"));
+			Doi doi = doiString == null ? null : Doi.fromString(doiString);
+			String openalexIdString = Utils.stringOrNull(jsonObject.get("openalex_id"));
+			OpenalexId openalexId = openalexIdString == null ? null : OpenalexId.fromString(openalexIdString);
 
-			Collection<String> knownDois = new TreeSet<>(String.CASE_INSENSITIVE_ORDER);
-			JsonArray doisArray = jsonObject.getAsJsonArray("known_dois");
+			Collection<Doi> knownDois = new HashSet<>();
+			JsonArray doisArray = jsonObject.getAsJsonArray("known_citing_dois");
 
 			for (JsonElement element : doisArray) {
-				knownDois.add(element.getAsString());
+				knownDois.add(Doi.fromString(element.getAsString()));
 			}
 
-			result.add(new CitationData(id, doi, knownDois));
+			result.add(new CitationData(id, doi, openalexId, knownDois));
 		}
 
 		return result;
diff --git a/scrapers/src/main/java/nl/esciencecenter/rsd/scraper/doi/PostgrestMentionRepository.java b/scrapers/src/main/java/nl/esciencecenter/rsd/scraper/doi/PostgrestMentionRepository.java
index 768731c81..1b67a7068 100644
--- a/scrapers/src/main/java/nl/esciencecenter/rsd/scraper/doi/PostgrestMentionRepository.java
+++ b/scrapers/src/main/java/nl/esciencecenter/rsd/scraper/doi/PostgrestMentionRepository.java
@@ -5,29 +5,21 @@
 
 package nl.esciencecenter.rsd.scraper.doi;
 
-import com.google.gson.FieldNamingPolicy;
-import com.google.gson.Gson;
-import com.google.gson.GsonBuilder;
 import com.google.gson.JsonArray;
-import com.google.gson.JsonDeserializer;
+import com.google.gson.JsonElement;
+import com.google.gson.JsonObject;
 import com.google.gson.JsonParser;
-import com.google.gson.JsonPrimitive;
-import com.google.gson.JsonSerializer;
-import com.google.gson.reflect.TypeToken;
 import nl.esciencecenter.rsd.scraper.Utils;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
 
 import java.net.URI;
 import java.time.Instant;
 import java.time.ZonedDateTime;
+import java.util.ArrayList;
 import java.util.Collection;
 import java.util.Objects;
 import java.util.UUID;
 
-public class PostgrestMentionRepository implements MentionRepository {
-
-	private static final Logger LOGGER = LoggerFactory.getLogger(PostgrestMentionRepository.class);
+public class PostgrestMentionRepository {
 
 	private final String backendUrl;
 
@@ -35,102 +27,119 @@ public PostgrestMentionRepository(String backendUrl) {
 		this.backendUrl = Objects.requireNonNull(backendUrl);
 	}
 
-	static Collection<MentionRecord> parseJson(String data) {
-		return new GsonBuilder()
-				.setFieldNamingPolicy(FieldNamingPolicy.LOWER_CASE_WITH_UNDERSCORES)
-				.registerTypeAdapter(Instant.class, (JsonDeserializer<Instant>) (json, typeOfT, context) -> Instant.parse(json.getAsString()))
-				.registerTypeAdapter(ZonedDateTime.class, (JsonDeserializer<ZonedDateTime>) (json, typeOfT, context) -> ZonedDateTime.parse(json.getAsString()))
-				.registerTypeAdapter(URI.class, (JsonDeserializer<URI>) (json, typeOfT, context) -> {
-					try {
-						return URI.create(json.getAsString());
-					} catch (IllegalArgumentException e) {
-						LOGGER.warn("Could not create a URI of {} ", json.getAsString());
-						return null;
-					}
-				})
-				.create()
-				.fromJson(data, new TypeToken<Collection<MentionRecord>>() {
-				}.getType());
+	static Collection<RsdMentionIds> parseMultipleRsdIds(String json) {
+		Collection<RsdMentionIds> result = new ArrayList<>();
+
+		JsonArray rootArray = JsonParser.parseString(json).getAsJsonArray();
+		for (JsonElement jsonElement : rootArray) {
+
+			JsonObject arrayEntry = jsonElement.getAsJsonObject();
+			UUID id = UUID.fromString(arrayEntry.getAsJsonPrimitive("id").getAsString());
+			String doiString = Utils.stringOrNull(arrayEntry.get("doi"));
+			Doi doi = doiString == null ? null : Doi.fromString(doiString);
+			String openalexIdString = Utils.stringOrNull(arrayEntry.get("openalex_id"));
+			OpenalexId openalexId = openalexIdString == null ? null : OpenalexId.fromString(openalexIdString);
+
+			result.add(new RsdMentionIds(id, doi, openalexId));
+		}
+
+		return result;
 	}
 
-	@Override
-	public Collection<MentionRecord> leastRecentlyScrapedMentions(int limit) {
-		String data = Utils.getAsAdmin(backendUrl + "/mention?doi=not.is.null&order=scraped_at.asc.nullsfirst&limit=" + limit);
-		return parseJson(data);
+	static RsdMentionIds parseSingleRsdIds(String json) {
+		JsonObject root = JsonParser.parseString(json).getAsJsonArray().get(0).getAsJsonObject();
+
+		UUID id = UUID.fromString(root.getAsJsonPrimitive("id").getAsString());
+		String doiString = Utils.stringOrNull(root.get("doi"));
+		Doi doi = doiString == null ? null : Doi.fromString(doiString);
+		String openalexIdString = Utils.stringOrNull(root.get("openalex_id"));
+		OpenalexId openalexId = openalexIdString == null ? null : OpenalexId.fromString(openalexIdString);
+
+		return new RsdMentionIds(id, doi, openalexId);
+	}
+
+	public Collection<RsdMentionIds> leastRecentlyScrapedMentions(int limit) {
+		String data = Utils.getAsAdmin(backendUrl + "/mention?doi=not.is.null&order=scraped_at.asc.nullsfirst&select=id,doi,openalex_id&limit=" + limit);
+		return parseMultipleRsdIds(data);
 	}
 
-	@Override
-	public Collection<MentionRecord> mentionData(Collection<String> dois) {
-		throw new UnsupportedOperationException();
+	public void saveScrapedAt(RsdMentionIds ids, Instant scrapedAt) {
+		JsonObject root = new JsonObject();
+		root.addProperty("scraped_at", scrapedAt.toString());
+		Utils.patchAsAdmin(backendUrl + "/mention?select=id,doi,openalex_id&id=eq." + ids.id(), root.toString(), "Prefer", "return=representation");
 	}
 
-	@Override
-	public void save(Collection<MentionRecord> mentions) {
-		Gson gson = new GsonBuilder()
-				.serializeNulls()
-				.setFieldNamingPolicy(FieldNamingPolicy.LOWER_CASE_WITH_UNDERSCORES)
-				.registerTypeAdapter(Instant.class, (JsonSerializer<Instant>) (src, typeOfSrc, context) -> new JsonPrimitive(src.toString()))
-				.registerTypeAdapter(ZonedDateTime.class, (JsonSerializer<ZonedDateTime>) (src, typeOfSrc, context) -> new JsonPrimitive(src.toString()))
-				.create();
-
-		LOGGER.info("Will save {} mentions", mentions.size());
-
-		for (MentionRecord mention : mentions) {
-			String scrapedMentionJson = gson.toJson(mention);
-			String onConflictFilter;
-
-			if (mention.doi != null) {
-				onConflictFilter = "doi";
-			} else {
-				onConflictFilter = "external_id,source";
-			}
-
-			String uri = "%s/mention?on_conflict=%s&select=id".formatted(backendUrl, onConflictFilter);
-			String response;
-
-			try {
-				LOGGER.debug("Saving mention: {} / {} / {}", mention.doi, mention.externalId, mention.source);
-				response = Utils.postAsAdmin(uri, scrapedMentionJson, "Prefer", "resolution=merge-duplicates,return=representation");
-
-				JsonArray responseAsArray = JsonParser.parseString(response).getAsJsonArray();
-				// Used in MainCitations, do not remove
-				mention.id = UUID.fromString(responseAsArray.get(0).getAsJsonObject().getAsJsonPrimitive("id").getAsString());
-
-			} catch (RuntimeException e) {
-
-				LOGGER.warn("Failed to save mention: {} / {} / {}", mention.doi, mention.externalId, mention.source, e);
-
-				String metadataMessage = "Failed to save mention: DOI %s, external ID %s, source %s".formatted(mention.doi, mention.externalId, mention.source);
-				RuntimeException exceptionWithMetadata = new RuntimeException(metadataMessage, e);
-				if (mention.doi == null) {
-					Utils.saveExceptionInDatabase("Mention scraper", "mention", null, exceptionWithMetadata);
-				} else {
-					// We will try to update the scraped_at field, so that it goes back into the queue for being scraped
-					// Note that this operation in itself may also fail.
-					try {
-						String existingMentionResponse = Utils.getAsAdmin("%s/mention?doi=eq.%s&select=id".formatted(backendUrl, mention.doi));
-						JsonArray array = JsonParser.parseString(existingMentionResponse).getAsJsonArray();
-						if (array.size() == 1) {
-							String id = array.get(0).getAsJsonObject().getAsJsonPrimitive("id").getAsString();
-							Utils.saveErrorMessageInDatabase(null,
-									"mention",
-									null,
-									id,
-									"id",
-									ZonedDateTime.now(),
-									"scraped_at");
-
-							Utils.saveExceptionInDatabase("Mention scraper", "mention", UUID.fromString(id), exceptionWithMetadata);
-						} else {
-							Utils.saveExceptionInDatabase("Mention scraper", "mention", null, exceptionWithMetadata);
-						}
-					} catch (Exception e2) {
-						LOGGER.warn("Failed to save exception in database", e2);
-					}
-				}
-
-			}
+	public RsdMentionIds updateMention(RsdMentionRecord mention, boolean updateOpenAlexId) {
+		JsonObject root = createJsonFromMentionData(mention.content(), updateOpenAlexId);
+		root.addProperty("scraped_at", mention.scrapedAt().toString());
+		String response = Utils.patchAsAdmin(backendUrl + "/mention?select=id,doi,openalex_id&id=eq." + mention.id(), root.toString(), "Prefer", "return=representation");
+		return parseSingleRsdIds(response);
+	}
+
+	public RsdMentionIds createMentionIfNotExistsOnDoiAndGetIds(ExternalMentionRecord mention, Instant scrapedAt) {
+		Doi doi = mention.doi();
+		Objects.requireNonNull(doi);
+		Collection<RsdMentionIds> mentionsWithDoi = parseMultipleRsdIds(Utils.getAsAdmin(backendUrl + "/mention?select=id,doi,openalex_id&doi=eq." + doi.toUrlEncodedString()));
+		if (mentionsWithDoi.size() == 1) {
+			return mentionsWithDoi.iterator().next();
+		}
+
+		return createNewMention(mention, scrapedAt, false);
+	}
+
+	public RsdMentionIds createOrUpdateMentionWithOpenalexId(ExternalMentionRecord mention, Instant scrapedAt) {
+		OpenalexId openalexId = Objects.requireNonNull(mention.openalexId());
+		Doi doi = mention.doi();
+
+		String query = "/mention?select=id,doi,openalex_id";
+		if (mention.doi() != null) {
+			query += "&or=(openalex_id.eq.%s,doi.eq.%s)".formatted(openalexId.toUrlEncodedString(), doi.toUrlEncodedString());
+		} else {
+			query += "&openalex_id=eq.%s".formatted(openalexId.toUrlEncodedString());
+		}
+		String existingMentionsResponse = Utils.getAsAdmin(backendUrl + query);
+		Collection<RsdMentionIds> existingIds = parseMultipleRsdIds(existingMentionsResponse);
+
+		if (existingIds.size() > 1) {
+			throw new RuntimeException("Multiple entries with DOI %s or OpenAlex id %s exist, they should be merged".formatted(openalexId, mention.doi()));
+		}
+		if (existingIds.size() == 1) {
+			UUID id = existingIds.iterator().next().id();
+			return updateMention(new RsdMentionRecord(id, mention, scrapedAt), true);
+		}
+
+		return createNewMention(mention, scrapedAt, true);
+	}
+
+	private RsdMentionIds createNewMention(ExternalMentionRecord mention, Instant scrapedAt, boolean setOpenAlexId) {
+		JsonObject root = createJsonFromMentionData(mention, setOpenAlexId);
+		root.addProperty("scraped_at", scrapedAt.toString());
+		String response = Utils.postAsAdmin(backendUrl + "/mention?select=id,doi,openalex_id", root.toString(), "Prefer", "return=representation");
+		return parseSingleRsdIds(response);
+	}
 
+	static JsonObject createJsonFromMentionData(ExternalMentionRecord mention, boolean setOpenAlexId) {
+		JsonObject root = new JsonObject();
+		Doi doi = mention.doi();
+		root.addProperty("doi", doi == null ? null : mention.doi().toString());
+		ZonedDateTime doiRegistrationDate = mention.doiRegistrationDate();
+		root.addProperty("doi_registration_date", doiRegistrationDate == null ? null : doiRegistrationDate.toString());
+		if (setOpenAlexId) {
+			root.addProperty("openalex_id", mention.openalexId().toString());
 		}
+		URI url = mention.url();
+		root.addProperty("url", url == null ? null : mention.url().toString());
+		root.addProperty("title", mention.title());
+		root.addProperty("authors", mention.authors());
+		root.addProperty("publisher", mention.publisher());
+		root.addProperty("publication_year", mention.publicationYear());
+		root.addProperty("journal", mention.journal());
+		root.addProperty("page", mention.page());
+		root.addProperty("mention_type", mention.mentionType().name());
+		root.addProperty("source", mention.source());
+		root.addProperty("version", mention.version());
+		root.addProperty("source", mention.source());
+
+		return root;
 	}
 }
diff --git a/scrapers/src/main/java/nl/esciencecenter/rsd/scraper/doi/PostgrestReleaseRepository.java b/scrapers/src/main/java/nl/esciencecenter/rsd/scraper/doi/PostgrestReleaseRepository.java
index 66e075036..51e13c3a8 100644
--- a/scrapers/src/main/java/nl/esciencecenter/rsd/scraper/doi/PostgrestReleaseRepository.java
+++ b/scrapers/src/main/java/nl/esciencecenter/rsd/scraper/doi/PostgrestReleaseRepository.java
@@ -9,6 +9,7 @@
 import com.google.gson.Gson;
 import com.google.gson.GsonBuilder;
 import com.google.gson.JsonArray;
+import com.google.gson.JsonDeserializer;
 import com.google.gson.JsonObject;
 import com.google.gson.reflect.TypeToken;
 import nl.esciencecenter.rsd.scraper.Config;
@@ -17,9 +18,9 @@
 import java.time.Instant;
 import java.util.ArrayList;
 import java.util.Collection;
+import java.util.HashMap;
 import java.util.Map;
 import java.util.Objects;
-import java.util.TreeMap;
 import java.util.UUID;
 
 public class PostgrestReleaseRepository {
@@ -35,7 +36,7 @@ public Collection<ReleaseData> leastRecentlyScrapedReleases(int limit) {
 		return parseJson(data);
 	}
 
-	public void saveReleaseContent(Collection<ReleaseData> releaseData, Map<String, Collection<MentionRecord>> conceptDoiToDois, Map<String, UUID> versionDoiToMentionId) {
+	public void saveReleaseContent(Collection<ReleaseData> releaseData, Map<Doi, Collection<ExternalMentionRecord>> conceptDoiToDois, Map<Doi, UUID> versionDoiToMentionId) {
 		// First update the releases_scraped_at column.
 		JsonArray releasesBody = new JsonArray();
 		Instant now = Instant.now();
@@ -49,31 +50,35 @@ public void saveReleaseContent(Collection<ReleaseData> releaseData, Map<String,
 
 
 		// Then update the release_version table.
-		// For each scraped or existing version as a mention, we need to know its id of the mention table and the ids of the software to which it belongs.
-		Map<String, Collection<UUID>> conceptDoiToSoftwareIds = new TreeMap<>(String.CASE_INSENSITIVE_ORDER);
+		// For each scraped or existing version as a mention, we need to know its id of the mention table and the ids (plural, because multiple software entries can have the same concept DOI) of the software to which it belongs.
+		Map<Doi, Collection<UUID>> conceptDoiToSoftwareIds = new HashMap<>();
 		for (ReleaseData release : releaseData) {
 			Collection<UUID> softwareIds = conceptDoiToSoftwareIds.computeIfAbsent(release.conceptDoi, k -> new ArrayList<>());
 			softwareIds.add(release.softwareId);
 		}
 
-		Map<String, String> versionDoiToConceptDoi = new TreeMap<>(String.CASE_INSENSITIVE_ORDER);
-		for (Map.Entry<String, Collection<MentionRecord>> conceptDoiToDoisEntry : conceptDoiToDois.entrySet()) {
-			String conceptDoi = conceptDoiToDoisEntry.getKey();
-			Collection<MentionRecord> versionDois = conceptDoiToDoisEntry.getValue();
-			for (MentionRecord version : versionDois) {
-				versionDoiToConceptDoi.put(version.doi, conceptDoi);
+		Map<Doi, Doi> versionDoiToConceptDoi = new HashMap<>();
+		for (Map.Entry<Doi, Collection<ExternalMentionRecord>> conceptDoiToDoisEntry : conceptDoiToDois.entrySet()) {
+			Doi conceptDoi = conceptDoiToDoisEntry.getKey();
+			Collection<ExternalMentionRecord> versionDois = conceptDoiToDoisEntry.getValue();
+			for (ExternalMentionRecord version : versionDois) {
+				versionDoiToConceptDoi.put(version.doi(), conceptDoi);
 			}
 		}
 
 		JsonArray coupling = new JsonArray();
-		for (Map.Entry<String, String> entry : versionDoiToConceptDoi.entrySet()) {
-			String versionDoi = entry.getKey();
-			String conceptDoi = entry.getValue();
+		for (Map.Entry<Doi, Doi> entry : versionDoiToConceptDoi.entrySet()) {
+			Doi versionDoi = entry.getKey();
+			Doi conceptDoi = entry.getValue();
 			Collection<UUID> softwareIds = conceptDoiToSoftwareIds.get(conceptDoi);
 			for (UUID softwareId : softwareIds) {
 				JsonObject couple = new JsonObject();
+				UUID mentionId = versionDoiToMentionId.get(versionDoi);
+				if (mentionId == null) {
+					continue;
+				}
 				couple.addProperty("release_id", softwareId.toString());
-				couple.addProperty("mention_id", versionDoiToMentionId.get(versionDoi).toString());
+				couple.addProperty("mention_id", mentionId.toString());
 				coupling.add(couple);
 			}
 		}
@@ -82,8 +87,10 @@ public void saveReleaseContent(Collection<ReleaseData> releaseData, Map<String,
 	}
 
 	Collection<ReleaseData> parseJson(String data) {
+		JsonDeserializer<Doi> doiDeserializer = (json, type, context) -> Doi.fromString(json.getAsJsonPrimitive().getAsString());
 		Gson gson = new GsonBuilder()
 				.setFieldNamingPolicy(FieldNamingPolicy.LOWER_CASE_WITH_UNDERSCORES)
+				.registerTypeAdapter(Doi.class, doiDeserializer)
 				.create();
 		TypeToken<Collection<ReleaseData>> typeToken = new TypeToken<>() {
 		};
diff --git a/scrapers/src/main/java/nl/esciencecenter/rsd/scraper/doi/ReleaseData.java b/scrapers/src/main/java/nl/esciencecenter/rsd/scraper/doi/ReleaseData.java
index 45106375c..55807beb2 100644
--- a/scrapers/src/main/java/nl/esciencecenter/rsd/scraper/doi/ReleaseData.java
+++ b/scrapers/src/main/java/nl/esciencecenter/rsd/scraper/doi/ReleaseData.java
@@ -1,5 +1,5 @@
-// SPDX-FileCopyrightText: 2023 Ewan Cahen (Netherlands eScience Center) <e.cahen@esciencecenter.nl>
-// SPDX-FileCopyrightText: 2023 Netherlands eScience Center
+// SPDX-FileCopyrightText: 2023 - 2024 Ewan Cahen (Netherlands eScience Center) <e.cahen@esciencecenter.nl>
+// SPDX-FileCopyrightText: 2023 - 2024 Netherlands eScience Center
 //
 // SPDX-License-Identifier: Apache-2.0
 
@@ -12,8 +12,8 @@ public class ReleaseData {
 
 	public UUID softwareId;
 	public String slug;
-	public String conceptDoi;
-	public Collection<String> versionedDois;
+	public Doi conceptDoi;
+	public Collection<Doi> versionedDois;
 
 	@Override
 	public String toString() {
diff --git a/scrapers/src/main/java/nl/esciencecenter/rsd/scraper/doi/RsdMentionIds.java b/scrapers/src/main/java/nl/esciencecenter/rsd/scraper/doi/RsdMentionIds.java
new file mode 100644
index 000000000..81d529eb5
--- /dev/null
+++ b/scrapers/src/main/java/nl/esciencecenter/rsd/scraper/doi/RsdMentionIds.java
@@ -0,0 +1,15 @@
+// SPDX-FileCopyrightText: 2024 Ewan Cahen (Netherlands eScience Center) <e.cahen@esciencecenter.nl>
+// SPDX-FileCopyrightText: 2024 Netherlands eScience Center
+//
+// SPDX-License-Identifier: Apache-2.0
+
+package nl.esciencecenter.rsd.scraper.doi;
+
+import java.util.UUID;
+
+public record RsdMentionIds(
+		UUID id,
+		Doi doi,
+		OpenalexId openalexId
+) {
+}
diff --git a/scrapers/src/main/java/nl/esciencecenter/rsd/scraper/doi/RsdMentionRecord.java b/scrapers/src/main/java/nl/esciencecenter/rsd/scraper/doi/RsdMentionRecord.java
new file mode 100644
index 000000000..237e96152
--- /dev/null
+++ b/scrapers/src/main/java/nl/esciencecenter/rsd/scraper/doi/RsdMentionRecord.java
@@ -0,0 +1,16 @@
+// SPDX-FileCopyrightText: 2024 Ewan Cahen (Netherlands eScience Center) <e.cahen@esciencecenter.nl>
+// SPDX-FileCopyrightText: 2024 Netherlands eScience Center
+//
+// SPDX-License-Identifier: Apache-2.0
+
+package nl.esciencecenter.rsd.scraper.doi;
+
+import java.time.Instant;
+import java.util.UUID;
+
+public record RsdMentionRecord(
+		UUID id,
+		ExternalMentionRecord content,
+		Instant scrapedAt
+) {
+}
diff --git a/scrapers/src/test/java/nl/esciencecenter/rsd/scraper/doi/DataciteMentionRepositoryTest.java b/scrapers/src/test/java/nl/esciencecenter/rsd/scraper/doi/DataciteMentionRepositoryTest.java
index f0119a486..8441b4702 100644
--- a/scrapers/src/test/java/nl/esciencecenter/rsd/scraper/doi/DataciteMentionRepositoryTest.java
+++ b/scrapers/src/test/java/nl/esciencecenter/rsd/scraper/doi/DataciteMentionRepositoryTest.java
@@ -1,5 +1,5 @@
-// SPDX-FileCopyrightText: 2022 Ewan Cahen (Netherlands eScience Center) <e.cahen@esciencecenter.nl>
-// SPDX-FileCopyrightText: 2022 Netherlands eScience Center
+// SPDX-FileCopyrightText: 2022 - 2024 Ewan Cahen (Netherlands eScience Center) <e.cahen@esciencecenter.nl>
+// SPDX-FileCopyrightText: 2022 - 2024 Netherlands eScience Center
 //
 // SPDX-License-Identifier: Apache-2.0
 
@@ -15,10 +15,14 @@ public class DataciteMentionRepositoryTest {
 
 	@Test
 	public void givenCollectionOfStrings_whenJoining_thenCorrectStringReturned() {
-		Collection<String> strings = List.of("abc", "def", "ghij");
+		Doi doi1 = Doi.fromString("10.000/1");
+		Doi doi2 = Doi.fromString("10.2/2");
+		Doi doi3 = Doi.fromString("10.3/abc-def");
+		Collection<Doi> strings = List.of(doi1, doi2, doi3);
 
-		String joinedString = DataciteMentionRepository.joinCollection(strings);
+		String joinedString = DataciteMentionRepository.joinDoisForGraphqlQuery(strings);
 
-		Assertions.assertEquals("\"abc\",\"def\",\"ghij\"", joinedString);
+		String expected = "\"%s\",\"%s\",\"%s\"".formatted(doi1, doi2, doi3);
+		Assertions.assertEquals(expected, joinedString);
 	}
 }
diff --git a/scrapers/src/test/java/nl/esciencecenter/rsd/scraper/doi/DoiTest.java b/scrapers/src/test/java/nl/esciencecenter/rsd/scraper/doi/DoiTest.java
new file mode 100644
index 000000000..58ff894e4
--- /dev/null
+++ b/scrapers/src/test/java/nl/esciencecenter/rsd/scraper/doi/DoiTest.java
@@ -0,0 +1,54 @@
+// SPDX-FileCopyrightText: 2024 Ewan Cahen (Netherlands eScience Center) <e.cahen@esciencecenter.nl>
+// SPDX-FileCopyrightText: 2024 Netherlands eScience Center
+//
+// SPDX-License-Identifier: Apache-2.0
+
+package nl.esciencecenter.rsd.scraper.doi;
+
+import org.junit.jupiter.api.Assertions;
+import org.junit.jupiter.api.Test;
+import org.junit.jupiter.params.ParameterizedTest;
+import org.junit.jupiter.params.provider.ValueSource;
+
+class DoiTest {
+
+	@ParameterizedTest
+	@ValueSource(strings = {
+			"10.2533/chimia.2024.525",
+			"10.1017/9781108881425",
+			"10.3390/photonics11070630",
+			"10.1093/gigascience/giad048",
+			"10.1007/978-3-030-83508-8_2",
+			"10.22541/essoar.171500959.99365288/v1",
+			"10.1016/j.eswa.2023.120561",
+	})
+	void givenValidDoi_whenInstanceCreated_thenNoExceptionThrown(String validDoi) {
+		Doi doi = Assertions.assertDoesNotThrow(() -> Doi.fromString(validDoi));
+		Assertions.assertNotNull(doi);
+	}
+
+	@ParameterizedTest
+	@ValueSource(strings = {
+			"10.2533",
+			"10.2533/",
+			"https://doi.org/10.2533/chimia.2024.525",
+			"10.3390/photonics 11070630",
+			"10.3390/photonics11070630 ",
+			"11.1016/j.eswa.2023.120561",
+			"",
+	})
+	void givenInValidDoi_whenCreatingInstance_thenExceptionThrown(String invalidDoi) {
+		Assertions.assertThrows(RuntimeException.class, () -> Doi.fromString(invalidDoi));
+	}
+
+	@Test
+	void givenTwoValidDoisThatOnlyDifferInCase_whenComparing_thenTheyAreEqual() {
+		String upperCaseDoi = "10.2533/chimia.2024.525";
+		String lowerCaseDoi = "10.2533/CHIMIA.2024.525";
+
+		Doi doi1 = Doi.fromString(upperCaseDoi);
+		Doi doi2 = Doi.fromString(lowerCaseDoi);
+
+		Assertions.assertEquals(doi1, doi2);
+	}
+}
diff --git a/scrapers/src/test/java/nl/esciencecenter/rsd/scraper/doi/MainMentionsTest.java b/scrapers/src/test/java/nl/esciencecenter/rsd/scraper/doi/MainMentionsTest.java
index d60dfc854..f9acd80cb 100644
--- a/scrapers/src/test/java/nl/esciencecenter/rsd/scraper/doi/MainMentionsTest.java
+++ b/scrapers/src/test/java/nl/esciencecenter/rsd/scraper/doi/MainMentionsTest.java
@@ -1,5 +1,5 @@
-// SPDX-FileCopyrightText: 2022 Ewan Cahen (Netherlands eScience Center) <e.cahen@esciencecenter.nl>
-// SPDX-FileCopyrightText: 2022 Netherlands eScience Center
+// SPDX-FileCopyrightText: 2022 - 2024 Ewan Cahen (Netherlands eScience Center) <e.cahen@esciencecenter.nl>
+// SPDX-FileCopyrightText: 2022 - 2024 Netherlands eScience Center
 //
 // SPDX-License-Identifier: Apache-2.0
 
@@ -43,7 +43,7 @@ void givenValidDoiSourceData_whenParsing_thenMapReturned() {
 				  }
 				]""";
 
-		Map<String, String> doiToSource = MainMentions.parseJsonSources(validDoiSourceData);
+		Map<String, String> doiToSource = MainMentions.parseJsonDoiSources(validDoiSourceData);
 		Assertions.assertEquals(6, doiToSource.size());
 		Assertions.assertEquals("EIDR", doiToSource.get("10.5240/B1FA-0EEC-C316-3316-3A73-L"));
 		Assertions.assertEquals("Invalid DOI", doiToSource.get("notADoi"));
diff --git a/scrapers/src/test/java/nl/esciencecenter/rsd/scraper/doi/OpenalexIdTest.java b/scrapers/src/test/java/nl/esciencecenter/rsd/scraper/doi/OpenalexIdTest.java
new file mode 100644
index 000000000..0b20e9a4d
--- /dev/null
+++ b/scrapers/src/test/java/nl/esciencecenter/rsd/scraper/doi/OpenalexIdTest.java
@@ -0,0 +1,49 @@
+// SPDX-FileCopyrightText: 2024 Ewan Cahen (Netherlands eScience Center) <e.cahen@esciencecenter.nl>
+// SPDX-FileCopyrightText: 2024 Netherlands eScience Center
+//
+// SPDX-License-Identifier: Apache-2.0
+
+package nl.esciencecenter.rsd.scraper.doi;
+
+import org.junit.jupiter.api.Assertions;
+import org.junit.jupiter.api.Test;
+import org.junit.jupiter.params.ParameterizedTest;
+import org.junit.jupiter.params.provider.ValueSource;
+
+class OpenalexIdTest {
+
+	@ParameterizedTest
+	@ValueSource(strings = {
+			"https://openalex.org/W3160330321",
+			"https://openalex.org/w3160330321",
+			"https://openalex.org/W152867311",
+	})
+	void givenValidOpenalexId_whenInstanceCreated_thenNoExceptionThrown(String validId) {
+		OpenalexId openalexId = Assertions.assertDoesNotThrow(() -> OpenalexId.fromString(validId));
+		Assertions.assertNotNull(openalexId);
+	}
+
+	@ParameterizedTest
+	@ValueSource(strings = {
+			"http://openalex.org/W3160330321",
+			"https://openalex.org/3160330321",
+			"https://openalex.org/W3160330321/",
+			"https://openalex.org/works/W3160330321",
+			"W3160330321",
+			"",
+	})
+	void givenInValidOpenalexId_whenCreatingInstance_thenExceptionThrown(String invalidId) {
+		Assertions.assertThrows(RuntimeException.class, () -> OpenalexId.fromString(invalidId));
+	}
+
+	@Test
+	void givenTwoValidIdsThatOnlyDifferInCase_whenComparing_thenTheyAreEqual() {
+		String upperCaseId = "https://openalex.org/W3160330321";
+		String lowerCaseId = "https://openalex.org/w3160330321";
+
+		OpenalexId openalexId1 = OpenalexId.fromString(upperCaseId);
+		OpenalexId openalexId2 = OpenalexId.fromString(lowerCaseId);
+
+		Assertions.assertEquals(openalexId1, openalexId2);
+	}
+}