From 2e157f0cbbe253206fb75371c7c0831f818e7e55 Mon Sep 17 00:00:00 2001 From: j3nsch Date: Wed, 22 Nov 2023 09:04:18 +0100 Subject: [PATCH] #338 Functions for finding (duplicate) DOIs --- library/Opus/Doi/DoiManager.php | 39 +++++++++++++ tests/Opus/Doi/DoiManagerTest.php | 94 +++++++++++++++++++++++++++++++ 2 files changed, 133 insertions(+) diff --git a/library/Opus/Doi/DoiManager.php b/library/Opus/Doi/DoiManager.php index 56a05fba..1320c145 100644 --- a/library/Opus/Doi/DoiManager.php +++ b/library/Opus/Doi/DoiManager.php @@ -42,6 +42,8 @@ use Opus\Common\Log; use Opus\Common\Log\LogService; use Opus\Common\Model\NotFoundException; +use Opus\Db\DocumentIdentifiers; +use Opus\Db\TableGateway; use Opus\Document; use Opus\DocumentFinder; use Opus\Doi\Generator\DoiGeneratorException; @@ -851,4 +853,41 @@ public function storeRegistrationXml($doc, $xml) file_put_contents($filePath, $xml); } + + /** + * @return string[] + */ + public function getAllDoiValues() + { + $table = TableGateway::getInstance(DocumentIdentifiers::class); + + $database = $table->getAdapter(); + + $select = $table->select() + ->from('document_identifiers', 'value') + ->distinct(true) + ->where('type = ?', 'doi'); + + return $database->fetchCol($select); + } + + /** + * Returns DOIs that are linked to multiple documents. + * + * @return string[] + */ + public function getDuplicateDoiValues() + { + $table = TableGateway::getInstance(DocumentIdentifiers::class); + + $database = $table->getAdapter(); + + $select = $table->select() + ->from('document_identifiers', 'value') + ->group('value') + ->having('count(value) > 1') + ->where('type = ?', 'doi'); + + return $database->fetchCol($select); + } } diff --git a/tests/Opus/Doi/DoiManagerTest.php b/tests/Opus/Doi/DoiManagerTest.php index d04f05ea..3ca5a5b0 100644 --- a/tests/Opus/Doi/DoiManagerTest.php +++ b/tests/Opus/Doi/DoiManagerTest.php @@ -908,4 +908,98 @@ public function testGetLandingPageUrlOfDocForId() $manager->getLandingPageUrlOfDoc($docId) ); } + + public function testGetAllDoiValues() + { + $doiValues = [ + '10.0000/1111', + '10.0000/2222', + '10.0000/3333', + ]; + + foreach ($doiValues as $doi) { + $doc = Document::new(); + $identifier = $doc->addIdentifier(); + $identifier->setType('doi'); + $identifier->setValue($doi); + $doc->store(); + } + + // other identifier type than DOI + $identifier = $doc->addIdentifier(); + $identifier->setType('isbn'); + $identifier->setValue('isbn-value'); + + // duplicate DOI identifier + $identifier = $doc->addIdentifier(); + $identifier->setType('doi'); + $identifier->setValue('10.0000/3333'); + + $doc->store(); + + $manager = new DoiManager(); + + $values = $manager->getAllDoiValues(); + + $this->assertCount(3, $values); + $this->assertEquals($doiValues, $values); + } + + public function testGetAllDoiValuesNonFound() + { + $manager = new DoiManager(); + + $values = $manager->getAllDoiValues(); + + $this->assertIsArray($values); + $this->assertEmpty($values); + } + + public function testGetDuplicateDoiValues() + { + $doi1 = '10.1000/1111'; + $doi2 = '10.1000/2222'; + + $doc = Document::new(); + $doi = $doc->addIdentifier(); + $doi->setType('doi'); + $doi->setValue($doi1); + $doc->store(); + + $doc = Document::new(); + $doi = $doc->addIdentifier(); + $doi->setType('doi'); + $doi->setValue($doi1); + $doc->store(); + + $doc = Document::new(); + $doi = $doc->addIdentifier(); + $doi->setType('doi'); + $doi->setValue($doi2); + $doc->store(); + + // other identifier type than DOI + $doc = Document::new(); + $identifier = $doc->addIdentifier(); + $identifier->setType('isbn'); + $identifier->setValue('isbn-value'); + $doc->store(); + + $manager = new DoiManager(); + + $values = $manager->getDuplicateDoiValues(); + + $this->assertCount(1, $values); + $this->assertEquals($doi1, $values[0]); + } + + public function testGetDuplicateDoiValuesNonFound() + { + $manager = new DoiManager(); + + $values = $manager->getDuplicateDoiValues(); + + $this->assertIsArray($values); + $this->assertEmpty($values); + } }