From 0d4a3ea792956866738a56beccb4237ef8314bd7 Mon Sep 17 00:00:00 2001 From: "c.dumitru@orcid.org" Date: Tue, 13 Jun 2023 11:49:10 +0100 Subject: [PATCH 1/2] Added the methods to retrieve by source type --- .../org/orcid/persistence/dao/OrgDisambiguatedDao.java | 2 ++ .../persistence/dao/impl/OrgDisambiguatedDaoImpl.java | 10 ++++++++++ 2 files changed, 12 insertions(+) diff --git a/orcid-persistence/src/main/java/org/orcid/persistence/dao/OrgDisambiguatedDao.java b/orcid-persistence/src/main/java/org/orcid/persistence/dao/OrgDisambiguatedDao.java index ba8ddbb903e..26180f993dc 100644 --- a/orcid-persistence/src/main/java/org/orcid/persistence/dao/OrgDisambiguatedDao.java +++ b/orcid-persistence/src/main/java/org/orcid/persistence/dao/OrgDisambiguatedDao.java @@ -16,6 +16,8 @@ public interface OrgDisambiguatedDao extends GenericDao getOrgs(String searchTerm, int firstResult, int maxResults); List getChunk(int firstResult, int maxResults); + + List findBySourceType(String sourceType,int firstResult, int maxResults); OrgDisambiguatedEntity findBySourceIdAndSourceType(String sourceId, String sourceType); diff --git a/orcid-persistence/src/main/java/org/orcid/persistence/dao/impl/OrgDisambiguatedDaoImpl.java b/orcid-persistence/src/main/java/org/orcid/persistence/dao/impl/OrgDisambiguatedDaoImpl.java index bf1a640d663..0eaa844a266 100644 --- a/orcid-persistence/src/main/java/org/orcid/persistence/dao/impl/OrgDisambiguatedDaoImpl.java +++ b/orcid-persistence/src/main/java/org/orcid/persistence/dao/impl/OrgDisambiguatedDaoImpl.java @@ -39,6 +39,16 @@ public OrgDisambiguatedEntity findBySourceIdAndSourceType(String sourceId, Strin List results = query.getResultList(); return results.isEmpty() ? null : results.get(0); } + + @Override + public List findBySourceType(String sourceType, int firstResult, int maxResults){ + TypedQuery query = entityManager.createQuery("from OrgDisambiguatedEntity where sourceType = :sourceType", + OrgDisambiguatedEntity.class); + query.setParameter("sourceType", sourceType); + query.setFirstResult(firstResult); + query.setMaxResults(maxResults); + return query.getResultList(); + } @Override public List getChunk(int firstResult, int maxResults) { From f8b7490f8d80ddb5c1b1e35abae47f658fda9f7d Mon Sep 17 00:00:00 2001 From: "c.dumitru@orcid.org" Date: Tue, 13 Jun 2023 13:13:16 +0100 Subject: [PATCH 2/2] Implemented ringgold to csv cli logic --- .../source/cli/RingoldToRorCSVMapping.java | 121 ++++++++++++++++++ 1 file changed, 121 insertions(+) create mode 100644 orcid-scheduler-web/src/main/java/org/orcid/scheduler/loader/source/cli/RingoldToRorCSVMapping.java diff --git a/orcid-scheduler-web/src/main/java/org/orcid/scheduler/loader/source/cli/RingoldToRorCSVMapping.java b/orcid-scheduler-web/src/main/java/org/orcid/scheduler/loader/source/cli/RingoldToRorCSVMapping.java new file mode 100644 index 00000000000..86a98372357 --- /dev/null +++ b/orcid-scheduler-web/src/main/java/org/orcid/scheduler/loader/source/cli/RingoldToRorCSVMapping.java @@ -0,0 +1,121 @@ +package org.orcid.scheduler.loader.source.cli; + +import java.io.FileWriter; +import java.io.IOException; +import java.io.Writer; +import java.util.HashMap; +import java.util.List; + +import javax.annotation.PostConstruct; + +import org.apache.commons.lang.StringUtils; + +import org.orcid.core.manager.OrgDisambiguatedManager; +import org.orcid.core.orgs.OrgDisambiguatedSourceType; +import org.orcid.core.utils.FunderIdentifierType; +import org.orcid.persistence.constants.OrganizationStatus; +import org.orcid.persistence.dao.OrgDisambiguatedDao; +import org.orcid.persistence.jpa.entities.OrgDisambiguatedEntity; +import org.orcid.persistence.jpa.entities.OrgDisambiguatedExternalIdentifierEntity; +import org.orcid.pojo.OrgDisambiguated; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.springframework.beans.factory.annotation.Value; +import org.springframework.context.ApplicationContext; +import org.springframework.context.support.ClassPathXmlApplicationContext; + +public class RingoldToRorCSVMapping { + private static final Logger LOGGER = LoggerFactory.getLogger(RingoldToRorCSVMapping.class); + private OrgDisambiguatedDao orgDisambiguatedDao; + private OrgDisambiguatedManager orgDisambiguatedManager; + + private static final String RINGGOLD_TYPE = "RINGGOLD"; + + private static final int INDEXING_CHUNK_SIZE = 10000; + + @Value("${org.orcid.core.orgs.ringgoldtororcsv:/tmp/ringgoldtoror.csv}") + private String csvFilePath; + + /** + * Setup our spring resources + * + */ + @SuppressWarnings({ "resource" }) + @PostConstruct + private void init() { + ApplicationContext context = new ClassPathXmlApplicationContext("orcid-scheduler-context.xml"); + orgDisambiguatedDao = (OrgDisambiguatedDao) context.getBean("orgDisambiguatedDao"); + orgDisambiguatedManager = (OrgDisambiguatedManager) context.getBean("orgDisambiguatedManager"); + } + + public static void main(String[] args) { + RingoldToRorCSVMapping mappingData = new RingoldToRorCSVMapping(); + // TODO Auto-generated method stub + try { + + mappingData.init(); + mappingData.getCSVMapping(); + + } catch (Exception e) { + LOGGER.error("Exception when generating csv mapping Ringgolds to Rors", e); + System.err.println(e.getMessage()); + } finally { + System.exit(0); + } + + } + + public void getCSVMapping() { + LOGGER.info("About to start ringgold to ror csv mapping"); + List entities = null; + HashMap ringgoldMap = new HashMap(); + ringgoldMap.put("Ringgold", "ROR"); + int startIndex = 0; + do { + entities = orgDisambiguatedDao.findBySourceType(RINGGOLD_TYPE, startIndex, INDEXING_CHUNK_SIZE); + LOGGER.info("Found chunk of {} disambiguated orgs for indexing", entities.size()); + for (OrgDisambiguatedEntity entity : entities) { + if (StringUtils.equals(entity.getStatus(), OrganizationStatus.PART_OF_GROUP.name())) { + for (OrgDisambiguatedExternalIdentifierEntity externalIdentifier : entity.getExternalIdentifiers()) { + if (StringUtils.equals(externalIdentifier.getIdentifierType(), FunderIdentifierType.ISNI.value())) { + List orgsFromExternalIdentifier = orgDisambiguatedManager + .findOrgDisambiguatedIdsForSameExternalIdentifier(externalIdentifier.getIdentifier(), FunderIdentifierType.ISNI.value()); + if (orgsFromExternalIdentifier != null) { + orgsFromExternalIdentifier.stream().forEach((o -> { + + if (o.getSourceType().equals(OrgDisambiguatedSourceType.ROR.name())) { + ringgoldMap.put(entity.getSourceId(), o.getSourceId()); + } + + })); + } + } else if(StringUtils.equals(externalIdentifier.getIdentifierType(), OrgDisambiguatedSourceType.ROR.name())) { + ringgoldMap.put(entity.getSourceId(),externalIdentifier.getIdentifier()); + } + } + + //TBD check if there is any ROR that has the ringgold as external identifier?? + } + } + startIndex = startIndex + INDEXING_CHUNK_SIZE; + } while (!entities.isEmpty()); + + generateCsv(ringgoldMap); + + } + + public void generateCsv(HashMap ringgoldMap) { + String eol = System.getProperty("line.separator"); + try (Writer writer = new FileWriter("somefile.csv")) { + for (HashMap.Entry entry : ringgoldMap.entrySet()) { + writer.append(entry.getKey()) + .append(',') + .append(entry.getValue()) + .append(eol); + } + } catch (IOException ex) { + LOGGER.error("Cannot write the ringgold to ror csv", csvFilePath); + } + } + +}