-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy patheditMODSTopicsSingleObject.php
175 lines (146 loc) · 5.35 KB
/
editMODSTopicsSingleObject.php
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
#!/usr/bin/env drush
#<?php
/**
* This script is designed to transform the MODS records of a single object which contains:
*
* <subject>
* <topic>Education</topic>
* <topic>Health Education</topic>
* <topic>Other Data</topic>
* </subject>
*
* into a record that looks like this:
*
* <subject>
* <topic>Education</topic>
* </subject>
* <subject>
* <topic>Health Education</topic>
* </subject>
* <subject>
* <topic>Other Data</topic>
* </subject>
*
* This script also updates the DC record after the changes are made.
*
* @author Paul Church
*
* @date August 2014
*/
// grab the first user supplied parameter as the name of the collection
$objectPID = drush_shift();
if (! $objectPID) {
drush_print("***Error: please provide the object PID as the first argument");
drush_print("Example: drush php-script editMODSTopicsSingleObject.php RULA:13");
return;
}
// include all Tuque php files
$tuquePath = libraries_get_path('tuque') . '/*.php';
foreach (glob($tuquePath) as $filename) {
require_once ($filename);
}
// repository connection parameters
$url = 'localhost:8080/fedora';
$username = 'fedoraAdmin';
$password = 'fedoraAdmin';
// set up connection and repository variables
$connection = new RepositoryConnection($url, $username, $password);
$api = new FedoraApi($connection);
$repository = new FedoraRepository($api, new SimpleCache());
// keep track of how many troublesome objects we had to skip
$skippedObjects = array();
// try to fetch PID from repo
try {
// drush_print("Attempting to access $objectPID from repository");
$object = $repository->getObject($objectPID);
} catch (Exception $e) {
drush_print("\n\n**********####### ERROR #######*********");
drush_print("***Could not get object $objectPID from repo***\n\n");
$skippedObjects[] = $objectPID;
continue;
}
// grab the MODS data stream
$modsDS = $object['MODS'];
/**
* **************MODS RECORD*********************
*/
// drush_print("Editing MODS record");
$modsDOMDoc = new DOMDocument();
$modsDOMDoc->preserveWhiteSpace = false;
$modsDOMDoc->formatOutput = true;
$modsDOMDoc->loadXML($modsDS->content);
$modsXPath = new DOMXPath($modsDOMDoc);
$modsXPath->registerNameSpace('mods', 'http://www.loc.gov/mods/v3');
// flag to indicate if datastream reingest and DC regen is needed
$updateThisRecord = FALSE;
$domElemsToRemove = array();
// loop through all <subject> nodes
foreach ($modsXPath->query('//mods:subject') as $node) {
// loop through all the <topic> nodes that are children of <subject> nodes
// everytime we find a topic node, we create a new <subject><topic/></subject>
// trio and insert it in the DOM before the original <subject> node
foreach ($modsXPath->query('mods:topic', $node) as $topicNode) {
$newSubjectNode = $modsDOMDoc->createElement('subject');
$newTopicNode = $modsDOMDoc->createElement('topic', htmlspecialchars($topicNode->nodeValue));
$newNode = $node->parentNode->insertBefore($newSubjectNode, $node);
$newNode->appendChild($newTopicNode);
$updateThisRecord = TRUE;
// add this subject node to a list to be removed as it contains multiple topics
$domElemsToRemove[] = $node;
}
}
if (! empty($domElemsToRemove)) {
// our array may have duplicate elements in it, let's remove those
$domElemsToRemove = array_unique($domElemsToRemove);
// remove all subject nodes that contained multiple topics
foreach ($domElemsToRemove as $toBeRemoved) {
$toBeRemoved->parentNode->removeChild($toBeRemoved);
}
}
if ($updateThisRecord) {
try {
// write the new updated info back into the datastream
$modsDS->setContentFromString($modsDOMDoc->saveXML($modsDOMDoc->documentElement));
// ingest edited datastream into the repository
$object->ingestDatastream($modsDS);
} catch (Exception $e) {
drush_print("\n\n**********####### ERROR #######*********");
drush_print("***Could not set $objectPID MODS datastream content or ingest into repo ****\n\n");
$skippedObjects[] = $objectPID;
return;
}
// drush_print("MODS record updated for object pid: $objectPID\n");
/**
* ***********MODS RECORD COMPLETE****************
*/
try {
/**
* ****************DUBLIN CORE *******************
*/
// drush_print("Re-generating Dublin Core");
// update the DC based on the MODS record
$document = new DOMDocument();
$document->loadXML($modsDS->content);
$transform = 'mods_to_dc.xsl';
// the magic call
xml_form_builder_update_dc_datastream($object, $transform, $document);
// drush_print("Dublin core regenerated");
/**
* ***********DUBLIN CORE COMPLETE****************
*/
} catch (Exception $e) {
drush_print("\n\n**********####### ERROR #######*********");
drush_print("***Could not update $objectPID DC record ****\n\n");
$skippedObjects[] = $objectPID;
return;
}
}
drush_print("Main processing loop complete");
if (! empty($skippedObjects)) {
$skippedObjects = array_unique($skippedObjects);
drush_print("The script had problems with the following PID's");
foreach ($skippedObjects as $skipped) {
drush_print($skipped);
}
}
echo "\n\nAll operations complete\n";