diff --git a/.github/workflows/editorconfig-check.yml b/.github/workflows/editorconfig-check.yml new file mode 100644 index 000000000..29810f604 --- /dev/null +++ b/.github/workflows/editorconfig-check.yml @@ -0,0 +1,17 @@ +# SPDX-FileCopyrightText: 2024 Ewan Cahen (Netherlands eScience Center) +# SPDX-FileCopyrightText: 2024 Netherlands eScience Center +# +# SPDX-License-Identifier: Apache-2.0 + +name: EditorConfig Checker + +on: + pull_request: + +jobs: + editorconfig: + runs-on: ubuntu-20.04 + steps: + - uses: actions/checkout@v4 + - uses: editorconfig-checker/action-editorconfig-checker@main + - run: editorconfig-checker diff --git a/authentication/.editorconfig b/authentication/.editorconfig new file mode 100644 index 000000000..f4b6b5f3f --- /dev/null +++ b/authentication/.editorconfig @@ -0,0 +1,17 @@ +# SPDX-FileCopyrightText: 2024 Ewan Cahen (Netherlands eScience Center) +# SPDX-FileCopyrightText: 2024 Netherlands eScience Center +# +# SPDX-License-Identifier: Apache-2.0 + +# EditorConfig is awesome: https://editorconfig.org + +# Unix-style newlines with a newline ending every file and tab indentation +[**] +end_of_line = lf +insert_final_newline = true +charset = utf-8 +trim_trailing_whitespace = true +indent_style = tab + +[**.md] +indent_style = unset diff --git a/authentication/README.md b/authentication/README.md index 86f667044..3324e8fd5 100644 --- a/authentication/README.md +++ b/authentication/README.md @@ -16,6 +16,10 @@ This module handles authentication from third parties using oAuth2 and OpenID. ## Environment variables Check `.env.example` to see which environment variables are needed. In particular, look for the env variable `RSD_ENVIRONMENT` to allow for easy admin creation when developing and testing out the RSD (this is not safe for production!). +## Contributing + +This module uses [EditorConfig](https://editorconfig.org/) for basic formatting. Please check if your editor [already supports EditorConfig](https://editorconfig.org/#pre-installed) or if you need to [install a plugin](https://editorconfig.org/#download). A GitHub workflow is run on every PR to check if any files violate the formatting settings. + ## Developing locally If you want to develop and run the auth module locally, i.e. outside of Docker, you have to make two changes to files tracked by Git. 1. In `docker-compose.yml`, add the following lines to the `nginx` service: diff --git a/authentication/pom.xml b/authentication/pom.xml index fd0c72ee8..ea84309e7 100644 --- a/authentication/pom.xml +++ b/authentication/pom.xml @@ -9,9 +9,11 @@ SPDX-FileCopyrightText: 2022 Matthias Rüster (GFZ) + + 4.0.0 nl.research-software diff --git a/authentication/src/test/java/nl/esciencecenter/rsd/authentication/UtilsTest.java b/authentication/src/test/java/nl/esciencecenter/rsd/authentication/UtilsTest.java index 0d89341a4..c196f7732 100644 --- a/authentication/src/test/java/nl/esciencecenter/rsd/authentication/UtilsTest.java +++ b/authentication/src/test/java/nl/esciencecenter/rsd/authentication/UtilsTest.java @@ -19,48 +19,50 @@ class UtilsTest { @Test void givenValidWellKnownData_whenExtractingTokenEndpoint_correctResultReturned() { + // editorconfig-checker-disable String data = """ - { - "token_endpoint_auth_signing_alg_values_supported": [ - "RS256" - ], - "id_token_signing_alg_values_supported": [ - "RS256" - ], - "userinfo_endpoint": "https://sandbox.orcid.org/oauth/userinfo", - "authorization_endpoint": "https://sandbox.orcid.org/oauth/authorize", - "token_endpoint": "https://sandbox.orcid.org/oauth/token", - "jwks_uri": "https://sandbox.orcid.org/oauth/jwks", - "claims_supported": [ - "family_name", - "given_name", - "name", - "auth_time", - "iss", - "sub" - ], - "scopes_supported": [ - "openid" - ], - "subject_types_supported": [ - "public" - ], - "response_types_supported": [ - "code", - "id_token", - "id_token token" - ], - "claims_parameter_supported": false, - "token_endpoint_auth_methods_supported": [ - "client_secret_post" - ], - "grant_types_supported": [ - "authorization_code", - "implicit", - "refresh_token" - ], - "issuer": "https://sandbox.orcid.org" - }"""; + { + "token_endpoint_auth_signing_alg_values_supported": [ + "RS256" + ], + "id_token_signing_alg_values_supported": [ + "RS256" + ], + "userinfo_endpoint": "https://sandbox.orcid.org/oauth/userinfo", + "authorization_endpoint": "https://sandbox.orcid.org/oauth/authorize", + "token_endpoint": "https://sandbox.orcid.org/oauth/token", + "jwks_uri": "https://sandbox.orcid.org/oauth/jwks", + "claims_supported": [ + "family_name", + "given_name", + "name", + "auth_time", + "iss", + "sub" + ], + "scopes_supported": [ + "openid" + ], + "subject_types_supported": [ + "public" + ], + "response_types_supported": [ + "code", + "id_token", + "id_token token" + ], + "claims_parameter_supported": false, + "token_endpoint_auth_methods_supported": [ + "client_secret_post" + ], + "grant_types_supported": [ + "authorization_code", + "implicit", + "refresh_token" + ], + "issuer": "https://sandbox.orcid.org" + }"""; + // editorconfig-checker-enable URI tokenEndpoint = Utils.extractTokenUrlFromWellKnownData(data); diff --git a/backend-tests/.editorconfig b/backend-tests/.editorconfig new file mode 100644 index 000000000..976368a2e --- /dev/null +++ b/backend-tests/.editorconfig @@ -0,0 +1,17 @@ +# SPDX-FileCopyrightText: 2024 Ewan Cahen (Netherlands eScience Center) +# SPDX-FileCopyrightText: 2024 Netherlands eScience Center +# +# SPDX-License-Identifier: Apache-2.0 + +# EditorConfig is awesome: https://editorconfig.org + +# Unix-style newlines with a newline ending every file and tab indentation +[**] +end_of_line = lf +insert_final_newline = true +charset = utf-8 +trim_trailing_whitespace = true +indent_style = tab + +[**.{md,yml}] +indent_style = unset diff --git a/backend-tests/README.md b/backend-tests/README.md index 49a4aad19..ac6ead9e0 100644 --- a/backend-tests/README.md +++ b/backend-tests/README.md @@ -7,7 +7,7 @@ SPDX-License-Identifier: CC-BY-4.0 # Backend tests -This folder contains backend tests for the RSD. It is intended to mainly: +This folder contains backend tests for the RSD. It is intended to mainly: - test the correctness of row level security rules - test the correctness of remote procedure calls (no tests yet) - load test the backend/database (no tests yet) @@ -24,6 +24,10 @@ Tests should be written taking the following principles in account: - each test should be runnable independently of other tests - each test should be repeatable, without e.g. having to clean up the database first +## Contributing + +This module uses [EditorConfig](https://editorconfig.org/) for basic formatting. Please check if your editor [already supports EditorConfig](https://editorconfig.org/#pre-installed) or if you need to [install a plugin](https://editorconfig.org/#download). A GitHub workflow is run on every PR to check if any files violate the formatting settings. + ## Writing tests Each class containing tests should be annotated with `@ExtendWith({SetupAllTests.class})`. This refers to a class containing a global setup method, which runs once to check the connection to the database and to initialise global parameters. diff --git a/backend-tests/pom.xml b/backend-tests/pom.xml index 7152b11c7..48bc1c36e 100644 --- a/backend-tests/pom.xml +++ b/backend-tests/pom.xml @@ -7,9 +7,11 @@ SPDX-FileCopyrightText: 2023 - 2024 Netherlands eScience Center SPDX-License-Identifier: Apache-2.0 --> + + 4.0.0 nl.research-software diff --git a/database/.editorconfig b/database/.editorconfig new file mode 100644 index 000000000..f4b6b5f3f --- /dev/null +++ b/database/.editorconfig @@ -0,0 +1,17 @@ +# SPDX-FileCopyrightText: 2024 Ewan Cahen (Netherlands eScience Center) +# SPDX-FileCopyrightText: 2024 Netherlands eScience Center +# +# SPDX-License-Identifier: Apache-2.0 + +# EditorConfig is awesome: https://editorconfig.org + +# Unix-style newlines with a newline ending every file and tab indentation +[**] +end_of_line = lf +insert_final_newline = true +charset = utf-8 +trim_trailing_whitespace = true +indent_style = tab + +[**.md] +indent_style = unset diff --git a/database/104-person-views.sql b/database/104-person-views.sql index a69414bc4..b7d96082f 100644 --- a/database/104-person-views.sql +++ b/database/104-person-views.sql @@ -2,6 +2,7 @@ -- SPDX-FileCopyrightText: 2023 - 2024 Netherlands eScience Center -- SPDX-FileCopyrightText: 2023 Dusan Mijatovic (dv4all) -- SPDX-FileCopyrightText: 2023 dv4all +-- SPDX-FileCopyrightText: 2024 Ewan Cahen (Netherlands eScience Center) -- -- SPDX-License-Identifier: Apache-2.0 @@ -163,8 +164,8 @@ CREATE FUNCTION suggested_roles() RETURNS VARCHAR[] LANGUAGE sql STABLE AS $$ SELECT - ARRAY_AGG("role") - FROM ( + ARRAY_AGG("role") + FROM ( SELECT "role" FROM @@ -178,6 +179,5 @@ $$ team_member WHERE "role" IS NOT NULL - ) roles -; + ) roles; $$; diff --git a/scrapers/.editorconfig b/scrapers/.editorconfig new file mode 100644 index 000000000..76769f39a --- /dev/null +++ b/scrapers/.editorconfig @@ -0,0 +1,20 @@ +# SPDX-FileCopyrightText: 2024 Ewan Cahen (Netherlands eScience Center) +# SPDX-FileCopyrightText: 2024 Netherlands eScience Center +# +# SPDX-License-Identifier: Apache-2.0 + +# EditorConfig is awesome: https://editorconfig.org + +# Unix-style newlines with a newline ending every file and tab indentation +[**] +end_of_line = lf +insert_final_newline = true +charset = utf-8 +trim_trailing_whitespace = true +indent_style = tab + +[**.md] +indent_style = unset + +[**.py] +indent_style = unset diff --git a/scrapers/README.md b/scrapers/README.md index f2bcc20de..b6d0171ab 100644 --- a/scrapers/README.md +++ b/scrapers/README.md @@ -11,6 +11,10 @@ SPDX-License-Identifier: CC-BY-4.0 This module contains the scrapers used by the RSD. The scrapers are written in Java using Maven. +## Contributing + +This module uses [EditorConfig](https://editorconfig.org/) for basic formatting. Please check if your editor [already supports EditorConfig](https://editorconfig.org/#pre-installed) or if you need to [install a plugin](https://editorconfig.org/#download). A GitHub workflow is run on every PR to check if any files violate the formatting settings. + ## Running from within Docker Compose If you have an instance of the RSD, including the scrapers, running with Docker Compose, you can manually run a scraper by running the respective command from the root of the project content: diff --git a/scrapers/pom.xml b/scrapers/pom.xml index 575261309..3af551acf 100644 --- a/scrapers/pom.xml +++ b/scrapers/pom.xml @@ -11,9 +11,11 @@ SPDX-FileCopyrightText: 2024 Jason Maassen (Netherlands eScience Center) + + 4.0.0 nl.research-software @@ -111,8 +113,8 @@ SPDX-License-Identifier: Apache-2.0 org.slf4j - slf4j-api - 2.0.13 + slf4j-api + 2.0.13 @@ -179,6 +181,5 @@ SPDX-License-Identifier: Apache-2.0 - diff --git a/scrapers/src/main/java/nl/esciencecenter/rsd/scraper/RsdResponseException.java b/scrapers/src/main/java/nl/esciencecenter/rsd/scraper/RsdResponseException.java index 0aa081ebb..cac9317b5 100644 --- a/scrapers/src/main/java/nl/esciencecenter/rsd/scraper/RsdResponseException.java +++ b/scrapers/src/main/java/nl/esciencecenter/rsd/scraper/RsdResponseException.java @@ -1,5 +1,5 @@ -// SPDX-FileCopyrightText: 2022 - 2023 Ewan Cahen (Netherlands eScience Center) -// SPDX-FileCopyrightText: 2022 - 2023 Netherlands eScience Center +// SPDX-FileCopyrightText: 2022 - 2024 Ewan Cahen (Netherlands eScience Center) +// SPDX-FileCopyrightText: 2022 - 2024 Netherlands eScience Center // SPDX-FileCopyrightText: 2022 Christian Meeßen (GFZ) // SPDX-FileCopyrightText: 2022 Helmholtz Centre Potsdam - GFZ German Research Centre for Geosciences // @@ -10,9 +10,9 @@ import java.net.URI; public class RsdResponseException extends Exception { - + private static final long serialVersionUID = 1L; - + public final int statusCode; public final URI uri; public final String body; diff --git a/scrapers/src/main/java/nl/esciencecenter/rsd/scraper/doi/DataCiteReleaseRepository.java b/scrapers/src/main/java/nl/esciencecenter/rsd/scraper/doi/DataCiteReleaseRepository.java index b50a46791..f21057230 100644 --- a/scrapers/src/main/java/nl/esciencecenter/rsd/scraper/doi/DataCiteReleaseRepository.java +++ b/scrapers/src/main/java/nl/esciencecenter/rsd/scraper/doi/DataCiteReleaseRepository.java @@ -23,21 +23,23 @@ public class DataCiteReleaseRepository { private static final Logger LOGGER = LoggerFactory.getLogger(DataCiteReleaseRepository.class); + // editorconfig-checker-disable private static final String QUERY_UNFORMATTED = """ - query { - works(ids: [%s], first: 10000) { - nodes { - doi - versionOfCount - relatedIdentifiers { - relationType - relatedIdentifierType - relatedIdentifier - } - } - } - } - """; + query { + works(ids: [%s], first: 10000) { + nodes { + doi + versionOfCount + relatedIdentifiers { + relationType + relatedIdentifierType + relatedIdentifier + } + } + } + } + """; + // editorconfig-checker-enable public Map> getVersionedDois(Collection conceptDois) { if (conceptDois.isEmpty()) { diff --git a/scrapers/src/main/java/nl/esciencecenter/rsd/scraper/doi/DataciteMentionRepository.java b/scrapers/src/main/java/nl/esciencecenter/rsd/scraper/doi/DataciteMentionRepository.java index 5ff783a8c..d4d048a58 100644 --- a/scrapers/src/main/java/nl/esciencecenter/rsd/scraper/doi/DataciteMentionRepository.java +++ b/scrapers/src/main/java/nl/esciencecenter/rsd/scraper/doi/DataciteMentionRepository.java @@ -30,39 +30,41 @@ public class DataciteMentionRepository { private static final Logger LOGGER = LoggerFactory.getLogger(DataciteMentionRepository.class); + // editorconfig-checker-disable private static final String QUERY_UNFORMATTED = """ - query { - works(ids: [%s], first: 10000) { - nodes { - doi - types { - resourceType - resourceTypeGeneral - } - version - relatedIdentifiers { - relatedIdentifier - relatedIdentifierType - } - titles(first: 1) { - title - } - publisher { - name - } - publicationYear - registered - creators { - givenName - familyName - } - contributors { - givenName - familyName - } - } - } - }"""; + query { + works(ids: [%s], first: 10000) { + nodes { + doi + types { + resourceType + resourceTypeGeneral + } + version + relatedIdentifiers { + relatedIdentifier + relatedIdentifierType + } + titles(first: 1) { + title + } + publisher { + name + } + publicationYear + registered + creators { + givenName + familyName + } + contributors { + givenName + familyName + } + } + } + }"""; + // editorconfig-checker-enable private static final Map dataciteTypeMap; private static final Map dataciteTextTypeMap; @@ -112,8 +114,8 @@ public class DataciteMentionRepository { // "10.5281/zenodo.1408128","10.1186/s12859-018-2165-7" static String joinDoisForGraphqlQuery(Collection dois) { return dois.stream() - .map(Doi::toString) - .collect(Collectors.joining("\",\"", "\"", "\"")); + .map(Doi::toString) + .collect(Collectors.joining("\",\"", "\"", "\"")); } static Collection jsonStringToUniqueMentions(String json) { @@ -189,19 +191,19 @@ static ExternalMentionRecord parseWork(JsonObject work) { } return new ExternalMentionRecord( - doi, - doiRegistrationDate, - null, - url, - title, - authors, - publisher, - publicationYear, - null, - null, - mentionType, - "DataCite", - version + doi, + doiRegistrationDate, + null, + url, + title, + authors, + publisher, + publicationYear, + null, + null, + mentionType, + "DataCite", + version ); } diff --git a/scrapers/src/main/java/nl/esciencecenter/rsd/scraper/git/CommitData.java b/scrapers/src/main/java/nl/esciencecenter/rsd/scraper/git/CommitData.java index e2080e934..dbe03ab83 100644 --- a/scrapers/src/main/java/nl/esciencecenter/rsd/scraper/git/CommitData.java +++ b/scrapers/src/main/java/nl/esciencecenter/rsd/scraper/git/CommitData.java @@ -1,5 +1,5 @@ -// SPDX-FileCopyrightText: 2023 Ewan Cahen (Netherlands eScience Center) -// SPDX-FileCopyrightText: 2023 Netherlands eScience Center +// SPDX-FileCopyrightText: 2023 - 2024 Ewan Cahen (Netherlands eScience Center) +// SPDX-FileCopyrightText: 2023 - 2024 Netherlands eScience Center // // SPDX-License-Identifier: Apache-2.0 @@ -8,8 +8,8 @@ import java.time.ZonedDateTime; public record CommitData( - BasicRepositoryData basicData, - CommitsPerWeek commitHistory, - ZonedDateTime commitHistoryScrapedAt + BasicRepositoryData basicData, + CommitsPerWeek commitHistory, + ZonedDateTime commitHistoryScrapedAt ) { } diff --git a/scrapers/src/main/java/nl/esciencecenter/rsd/scraper/git/MainBasicData.java b/scrapers/src/main/java/nl/esciencecenter/rsd/scraper/git/MainBasicData.java index 643ba7877..c2c3be557 100644 --- a/scrapers/src/main/java/nl/esciencecenter/rsd/scraper/git/MainBasicData.java +++ b/scrapers/src/main/java/nl/esciencecenter/rsd/scraper/git/MainBasicData.java @@ -1,5 +1,5 @@ -// SPDX-FileCopyrightText: 2023 Ewan Cahen (Netherlands eScience Center) -// SPDX-FileCopyrightText: 2023 Netherlands eScience Center +// SPDX-FileCopyrightText: 2023 - 2024 Ewan Cahen (Netherlands eScience Center) +// SPDX-FileCopyrightText: 2023 - 2024 Netherlands eScience Center // // SPDX-License-Identifier: Apache-2.0 @@ -9,6 +9,8 @@ import nl.esciencecenter.rsd.scraper.RsdRateLimitException; import nl.esciencecenter.rsd.scraper.RsdResponseException; import nl.esciencecenter.rsd.scraper.Utils; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import java.net.URI; import java.time.ZonedDateTime; @@ -16,23 +18,20 @@ import java.util.Optional; import java.util.concurrent.CompletableFuture; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - public class MainBasicData { private static final Logger LOGGER = LoggerFactory.getLogger(MainBasicData.class); - + public static void main(String[] args) { LOGGER.info("Start scraping basic Git data"); - + long t1 = System.currentTimeMillis(); - + scrapeGitHub(); scrapeGitLab(); - + long time = System.currentTimeMillis() - t1; - + LOGGER.info("Done scraping basic Git data ({} ms.)", time); } diff --git a/scrapers/src/main/java/nl/esciencecenter/rsd/scraper/git/MainCommits.java b/scrapers/src/main/java/nl/esciencecenter/rsd/scraper/git/MainCommits.java index 3b1fbf678..142f125c1 100644 --- a/scrapers/src/main/java/nl/esciencecenter/rsd/scraper/git/MainCommits.java +++ b/scrapers/src/main/java/nl/esciencecenter/rsd/scraper/git/MainCommits.java @@ -1,5 +1,5 @@ -// SPDX-FileCopyrightText: 2022 - 2023 Ewan Cahen (Netherlands eScience Center) -// SPDX-FileCopyrightText: 2022 - 2023 Netherlands eScience Center +// SPDX-FileCopyrightText: 2022 - 2024 Ewan Cahen (Netherlands eScience Center) +// SPDX-FileCopyrightText: 2022 - 2024 Netherlands eScience Center // SPDX-FileCopyrightText: 2022 Christian Meeßen (GFZ) // SPDX-FileCopyrightText: 2022 Helmholtz Centre Potsdam - GFZ German Research Centre for Geosciences // @@ -11,6 +11,8 @@ import nl.esciencecenter.rsd.scraper.RsdRateLimitException; import nl.esciencecenter.rsd.scraper.RsdResponseException; import nl.esciencecenter.rsd.scraper.Utils; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import java.net.URI; import java.time.ZonedDateTime; @@ -18,25 +20,21 @@ import java.util.Optional; import java.util.concurrent.CompletableFuture; - -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - public class MainCommits { private static final Logger LOGGER = LoggerFactory.getLogger(MainCommits.class); - + public static void main(String[] args) { - + LOGGER.info("Start scraping commits"); - + long t1 = System.currentTimeMillis(); - + scrapeGitHub(); scrapeGitLab(); long time = System.currentTimeMillis() - t1; - + LOGGER.info("Done scraping commits ({} ms.)", time); } diff --git a/scrapers/src/main/java/nl/esciencecenter/rsd/scraper/git/MainContributors.java b/scrapers/src/main/java/nl/esciencecenter/rsd/scraper/git/MainContributors.java index 31395ebb7..007800efd 100644 --- a/scrapers/src/main/java/nl/esciencecenter/rsd/scraper/git/MainContributors.java +++ b/scrapers/src/main/java/nl/esciencecenter/rsd/scraper/git/MainContributors.java @@ -1,5 +1,5 @@ -// SPDX-FileCopyrightText: 2023 Ewan Cahen (Netherlands eScience Center) -// SPDX-FileCopyrightText: 2023 Netherlands eScience Center +// SPDX-FileCopyrightText: 2023 - 2024 Ewan Cahen (Netherlands eScience Center) +// SPDX-FileCopyrightText: 2023 - 2024 Netherlands eScience Center // // SPDX-License-Identifier: Apache-2.0 @@ -9,6 +9,8 @@ import nl.esciencecenter.rsd.scraper.RsdRateLimitException; import nl.esciencecenter.rsd.scraper.RsdResponseException; import nl.esciencecenter.rsd.scraper.Utils; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import java.net.URI; import java.time.ZonedDateTime; @@ -16,23 +18,20 @@ import java.util.Optional; import java.util.concurrent.CompletableFuture; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - public class MainContributors { private static final Logger LOGGER = LoggerFactory.getLogger(MainContributors.class); - + public static void main(String[] args) { LOGGER.info("Start scraping contributors"); - + long t1 = System.currentTimeMillis(); - + scrapeGitHub(); scrapeGitLab(); - + long time = System.currentTimeMillis() - t1; - + LOGGER.info("Done scraping contributors ({} ms.)", time); } diff --git a/scrapers/src/main/java/nl/esciencecenter/rsd/scraper/git/MainProgrammingLanguages.java b/scrapers/src/main/java/nl/esciencecenter/rsd/scraper/git/MainProgrammingLanguages.java index 59502554f..e42fecdd6 100644 --- a/scrapers/src/main/java/nl/esciencecenter/rsd/scraper/git/MainProgrammingLanguages.java +++ b/scrapers/src/main/java/nl/esciencecenter/rsd/scraper/git/MainProgrammingLanguages.java @@ -1,5 +1,5 @@ -// SPDX-FileCopyrightText: 2022 - 2023 Ewan Cahen (Netherlands eScience Center) -// SPDX-FileCopyrightText: 2022 - 2023 Netherlands eScience Center +// SPDX-FileCopyrightText: 2022 - 2024 Ewan Cahen (Netherlands eScience Center) +// SPDX-FileCopyrightText: 2022 - 2024 Netherlands eScience Center // // SPDX-License-Identifier: Apache-2.0 @@ -9,6 +9,8 @@ import nl.esciencecenter.rsd.scraper.RsdRateLimitException; import nl.esciencecenter.rsd.scraper.RsdResponseException; import nl.esciencecenter.rsd.scraper.Utils; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import java.net.URI; import java.time.ZonedDateTime; @@ -16,24 +18,21 @@ import java.util.Optional; import java.util.concurrent.CompletableFuture; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - public class MainProgrammingLanguages { private static final Logger LOGGER = LoggerFactory.getLogger(MainProgrammingLanguages.class); - + public static void main(String[] args) { - + LOGGER.info("Start scraping programming languages"); - + long t1 = System.currentTimeMillis(); - + scrapeGithub(); scrapeGitLab(); - + long time = System.currentTimeMillis() - t1; - + LOGGER.info("Done scraping programming languages ({} ms.)", time); } diff --git a/scrapers/src/main/java/nl/esciencecenter/rsd/scraper/ror/BasicOrganisationData.java b/scrapers/src/main/java/nl/esciencecenter/rsd/scraper/ror/BasicOrganisationData.java index 778a98453..ad75c7116 100644 --- a/scrapers/src/main/java/nl/esciencecenter/rsd/scraper/ror/BasicOrganisationData.java +++ b/scrapers/src/main/java/nl/esciencecenter/rsd/scraper/ror/BasicOrganisationData.java @@ -1,5 +1,7 @@ // SPDX-FileCopyrightText: 2024 Christian Meeßen (GFZ) +// SPDX-FileCopyrightText: 2024 Ewan Cahen (Netherlands eScience Center) // SPDX-FileCopyrightText: 2024 Helmholtz Centre Potsdam - GFZ German Research Centre for Geosciences +// SPDX-FileCopyrightText: 2024 Netherlands eScience Center // // SPDX-License-Identifier: Apache-2.0 @@ -7,5 +9,5 @@ import java.util.UUID; -public record BasicOrganisationData(UUID id, String rorId, String country, String city) { +public record BasicOrganisationData(UUID id, String rorId, String country, String city) { } diff --git a/scrapers/src/main/java/nl/esciencecenter/rsd/scraper/ror/MainRor.java b/scrapers/src/main/java/nl/esciencecenter/rsd/scraper/ror/MainRor.java index b89893399..f46471d12 100644 --- a/scrapers/src/main/java/nl/esciencecenter/rsd/scraper/ror/MainRor.java +++ b/scrapers/src/main/java/nl/esciencecenter/rsd/scraper/ror/MainRor.java @@ -22,7 +22,7 @@ public class MainRor { private static final Logger LOGGER = LoggerFactory.getLogger(MainRor.class); private static final int SCRAPING_LIMIT = Config.maxRequestsRor(); - + public static void main(String[] args) { LOGGER.info("Start scraping ROR data."); long t1 = System.currentTimeMillis(); @@ -34,7 +34,7 @@ public static void main(String[] args) { private static void scrapeLocationData() { RorPostgrestConnector organisationsInRSD = new RorPostgrestConnector(); Collection organisationsToScrape = organisationsInRSD.organisationsWithoutLocation(SCRAPING_LIMIT); - CompletableFuture [] futures = new CompletableFuture[organisationsToScrape.size()]; + CompletableFuture[] futures = new CompletableFuture[organisationsToScrape.size()]; ZonedDateTime scrapedAt = ZonedDateTime.now(); int i = 0; String tableName = "organisation"; diff --git a/scrapers/src/main/resources/logback.xml b/scrapers/src/main/resources/logback.xml index 0439cf1bf..a94f68322 100644 --- a/scrapers/src/main/resources/logback.xml +++ b/scrapers/src/main/resources/logback.xml @@ -1,17 +1,25 @@ + + + - - + + - - - %d{HH:mm:ss.SSS} [%thread] %-5level %logger{36} -%kvp- %msg%n - - + + + %d{HH:mm:ss.SSS} [%thread] %-5level %logger{36} -%kvp- %msg%n + + - - - + + + diff --git a/scrapers/src/test/java/nl/esciencecenter/rsd/scraper/doi/MainMentionsTest.java b/scrapers/src/test/java/nl/esciencecenter/rsd/scraper/doi/MainMentionsTest.java index f9acd80cb..ac8c116df 100644 --- a/scrapers/src/test/java/nl/esciencecenter/rsd/scraper/doi/MainMentionsTest.java +++ b/scrapers/src/test/java/nl/esciencecenter/rsd/scraper/doi/MainMentionsTest.java @@ -15,33 +15,35 @@ public class MainMentionsTest { @Test void givenValidDoiSourceData_whenParsing_thenMapReturned() { + // editorconfig-checker-disable String validDoiSourceData = """ - [ - { - "DOI": "10.5240/B1FA-0EEC-C316-3316-3A73-L", - "RA": "EIDR" - }, - { - "DOI": "notADoi", - "status": "Invalid DOI" - }, - { - "DOI": "10.5281/zenodo.1436372", - "RA": "DataCite" - }, - { - "DOI": "10.5281/zenodo.2633819", - "RA": "DataCite" - }, - { - "DOI": "10.5281/zenodo.5825192", - "RA": "DataCite" - }, - { - "DOI": "10.35802/218300", - "RA": "Crossref" - } - ]"""; + [ + { + "DOI": "10.5240/B1FA-0EEC-C316-3316-3A73-L", + "RA": "EIDR" + }, + { + "DOI": "notADoi", + "status": "Invalid DOI" + }, + { + "DOI": "10.5281/zenodo.1436372", + "RA": "DataCite" + }, + { + "DOI": "10.5281/zenodo.2633819", + "RA": "DataCite" + }, + { + "DOI": "10.5281/zenodo.5825192", + "RA": "DataCite" + }, + { + "DOI": "10.35802/218300", + "RA": "Crossref" + } + ]"""; + // editorconfig-checker-enable Map doiToSource = MainMentions.parseJsonDoiSources(validDoiSourceData); Assertions.assertEquals(6, doiToSource.size()); diff --git a/scrapers/src/test/java/nl/esciencecenter/rsd/scraper/ror/RorScraperTest.java b/scrapers/src/test/java/nl/esciencecenter/rsd/scraper/ror/RorScraperTest.java index 27df6aa8e..4abb428a6 100644 --- a/scrapers/src/test/java/nl/esciencecenter/rsd/scraper/ror/RorScraperTest.java +++ b/scrapers/src/test/java/nl/esciencecenter/rsd/scraper/ror/RorScraperTest.java @@ -22,7 +22,7 @@ @WireMockTest(proxyMode = true) class RorScraperTest { - + private static RorScraper rorScraper; private static String completeJsonResponse = "{\"id\":\"https://ror.org/04z8jg394\",\"name\":\"Helmholtz Centre Potsdam - GFZ German Research Centre for Geosciences\",\"email_address\":\"\",\"ip_addresses\":[],\"established\":1992,\"types\":[\"Facility\"],\"relationships\":[{\"label\":\"Helmholtz Association of German Research Centres\",\"type\":\"Parent\",\"id\":\"https://ror.org/0281dp749\"}],\"addresses\":[{\"lat\":52.39886,\"lng\":13.06566,\"state\":null,\"state_code\":null,\"city\":\"Potsdam\",\"geonames_city\":{\"id\":2852458,\"city\":\"Potsdam\",\"geonames_admin1\":{\"name\":\"Brandenburg\",\"id\":2945356,\"ascii_name\":\"Brandenburg\",\"code\":\"DE.11\"},\"geonames_admin2\":{\"name\":null,\"id\":null,\"ascii_name\":null,\"code\":\"DE.11.00\"},\"license\":{\"attribution\":\"Data from geonames.org under a CC-BY 3.0 license\",\"license\":\"http://creativecommons.org/licenses/by/3.0/\"},\"nuts_level1\":{\"name\":null,\"code\":null},\"nuts_level2\":{\"name\":null,\"code\":null},\"nuts_level3\":{\"name\":null,\"code\":null}},\"postcode\":null,\"primary\":false,\"line\":null,\"country_geonames_id\":2921044}],\"links\":[\"https://www.gfz-potsdam.de\"],\"aliases\":[],\"acronyms\":[\"GFZ\"],\"status\":\"active\",\"wikipedia_url\":\"https://en.wikipedia.org/wiki/GFZ_German_Research_Centre_for_Geosciences\",\"labels\":[{\"label\":\"Helmholtz-Zentrum Potsdam - Deutsches GeoForschungsZentrum GFZ\",\"iso639\":\"de\"}],\"country\":{\"country_name\":\"Germany\",\"country_code\":\"DE\"},\"external_ids\":{\"ISNI\":{\"preferred\":null,\"all\":[\"0000 0000 9195 2461\"]},\"FundRef\":{\"preferred\":\"501100010956\",\"all\":[\"501100010956\"]},\"Wikidata\":{\"preferred\":null,\"all\":[\"Q1205654\"]},\"GRID\":{\"preferred\":\"grid.23731.34\",\"all\":\"grid.23731.34\"}}}"; private static String apiDomain = "api.ror.org"; @@ -33,36 +33,36 @@ void testLocations() throws Exception { stubFor( get(apiPath) - .withHost(WireMock.equalTo(apiDomain)) - .willReturn( - aResponse() - .withStatus(200) - .withBody(completeJsonResponse) - )); - + .withHost(WireMock.equalTo(apiDomain)) + .willReturn( + aResponse() + .withStatus(200) + .withBody(completeJsonResponse) + )); + rorScraper = new RorScraper("http://" + apiDomain + apiPath); - + assertEquals("Potsdam", rorScraper.city()); assertEquals("Germany", rorScraper.country()); } -@ParameterizedTest -@ValueSource(strings = { + @ParameterizedTest + @ValueSource(strings = { "{\"addresses\": [{\"city\": null}], \"country\": {\"country_name\": null}}", "{\"addresses\": [],\"country\": {}}", "{}", -}) + }) void testNullLocationsOrEmptyLocationOrEmptyResponse(String jsonBody) throws Exception { stubFor( get(apiPath) - .withHost(WireMock.equalTo(apiDomain)) - .willReturn( - aResponse() - .withStatus(200) - .withBody(jsonBody) - )); - + .withHost(WireMock.equalTo(apiDomain)) + .willReturn( + aResponse() + .withStatus(200) + .withBody(jsonBody) + )); + rorScraper = new RorScraper("http://" + apiDomain + apiPath); assertNull(rorScraper.city());