-
Notifications
You must be signed in to change notification settings - Fork 1
/
swapModsNamesAndUpdateDC.php
110 lines (83 loc) · 3.87 KB
/
swapModsNamesAndUpdateDC.php
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
#!/usr/bin/env drush
#<?php
# include all php files necessary for Tuque
foreach ( glob("/var/www/drupal/htdocs/sites/all/libraries/tuque/*.php") as $filename) {
require_once($filename);
}
# repository connection parameters
$url = 'localhost:8080/fedora';
$username = 'fedoraAdmin';
$password = 'fedoraAdmin';
# set up connection and repository variables
$connection = new RepositoryConnection($url, $username, $password);
$api = new FedoraApi($connection);
$repository = new FedoraRepository($api, new SimpleCache());
# query to grab all pdf collection objects from the repository
$sparqlQuery = "SELECT ?s
FROM <#ri>
WHERE {
?s <info:fedora/fedora-system:def/relations-external#isMemberOfCollection>
<info:fedora/islandora:sp_pdf_collection> .
}";
# run query
drush_print("\n*****Querying repository for all PDF objects...");
$allPDFObjects = $repository->ri->sparqlQuery($sparqlQuery);
drush_print("\n*****Query complete*****\n");
// main loop for ALL PDF OBJECTS in the collection
$totalNumObjects = count($allPDFObjects);
drush_print("There are $totalNumObjects objects to be processed");
drush_print("\n******Beginning main processing loop*****\n");
for ($counter = 0; $counter < $totalNumObjects; $counter++) {
$theObject = $allPDFObjects[$counter];
$realCount = $counter + 1;
drush_print("Processing record $realCount of $totalNumObjects");
//print $theObject['s']['value'];
$objectPID = $theObject['s']['value'];
# what is the pid you want?
//$pid = 'islandora:1';
# try to fetch PID from repo
try {
//drush_print("Attempting to access $objectPID from repository");
$object = $repository->getObject($objectPID);
}
catch (Exception $e) {
drush_print("\n\n**********####### ERROR #######*********");
drush_print("***Could not get object $objectPID from repo***\n\n");
continue;
}
# grab the MODS data stream
$modsDS = $object['MODS'];
/****************MODS RECORD**********************/
//drush_print("Editing MODS record");
$modsDOMDoc = new DOMDocument();
$modsDOMDoc->loadXML($modsDS->content);
$modsXPath = new DOMXPath($modsDOMDoc);
$modsXPath->registerNameSpace('mods', 'http://www.loc.gov/mods/v3');
// loop through all <name type="personal"> entries looking for authors
foreach ($modsXPath->query('//mods:name[@type="personal"]') as $node) {
// get original values
$originalGivenName = trim($modsXPath->query('mods:namePart[@type="given"]', $node)->item(0)->nodeValue);
$originalFamilyName = trim($modsXPath->query('mods:namePart[@type="family"]', $node)->item(0)->nodeValue);
// swap values
$modsXPath->query('mods:namePart[@type="given"]', $node)->item(0)->nodeValue = $originalFamilyName;
$modsXPath->query('mods:namePart[@type="family"]', $node)->item(0)->nodeValue = $originalGivenName;
}
// write the new updated info back into the datastream
$modsDS->setContentFromString($modsDOMDoc->saveXML($modsDOMDoc->documentElement));
# ingest edited datastream into the repository
$object->ingestDatastream($modsDS);
//drush_print("MODS record updated");
/*************MODS RECORD COMPLETE*****************/
/******************DUBLIN CORE ********************/
//drush_print("Re-generating Dublin Core");
// update the DC based on the MODS record
$document = new DOMDocument();
$document->loadXML($modsDS->content);
$transform = 'mods_to_dc.xsl';
// the magic call
xml_form_builder_update_dc_datastream($object, $transform, $document);
//drush_print("Dublin core regenerated");
/*************DUBLIN CORE COMPLETE*****************/
}
drush_print("Main processing loop complete");
echo "\n\nAll operations complete\n";