-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathead2002-plus-schematron.xsd
425 lines (391 loc) · 25.6 KB
/
ead2002-plus-schematron.xsd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
<?xml version="1.0" encoding="UTF-8"?>
<xs:schema xmlns="http://purl.oclc.org/dsdl/schematron" xmlns:xs="http://www.w3.org/2001/XMLSchema" xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
<!-- Import the full EAD 2002 XML Schema -->
<xs:import namespace="urn:isbn:1-931666-22-9" schemaLocation="http://www.loc.gov/ead/ead.xsd"/>
<!-- Add Schematron rules to impose further restrictions and warnings -->
<xs:element name="ead">
<xs:annotation>
<xs:appinfo>
<ns uri="urn:isbn:1-931666-22-9" prefix="ead"/>
<ns uri="/" prefix="local"/>
<xsl:function name="local:getUnitId" as="xs:string">
<xsl:param name="elem" as="element()"/>
<xsl:variable name="cmdid" as="xs:string" select="$elem/ancestor-or-self::ead:ead/ead:archdesc[1]/ead:did[1]/ead:unitid[1]/string()"/>
<xsl:variable name="partof" as="element()" select="
if ($elem/ancestor-or-self::*[self::ead:c or self::ead:c01 or self::ead:c02 or self::ead:c03 or self::ead:c04 or self::ead:c05 or self::ead:c06 or self::ead:c07 or self::ead:c08 or self::ead:c09 or self::ead:c10 or self::ead:c11 or self::ead:c12]) then
$elem/ancestor-or-self::*[self::ead:c or self::ead:c01 or self::ead:c02 or self::ead:c03 or self::ead:c04 or self::ead:c05 or self::ead:c06 or self::ead:c07 or self::ead:c08 or self::ead:c09 or self::ead:c10 or self::ead:c11 or self::ead:c12][1]
else if ($elem/ancestor-or-self::ead:archdesc) then
$elem/ancestor-or-self::ead:archdesc[1]
else
$elem/ancestor-or-self::ead:ead
"/>
<xsl:choose>
<xsl:when test="$partof/self::ead:ead">
<xsl:value-of select="concat('|', $cmdid, '|||', ($partof//ead:unittitle)[1]/string())"/>
</xsl:when>
<xsl:when test="$partof/self::ead:archdesc">
<xsl:value-of select="concat('|', $cmdid, '||', $partof/@level, '|', ($partof/ead:did/ead:unittitle)[1]/string())"/>
</xsl:when>
<xsl:when test="$partof/ead:did/ead:unitid">
<xsl:value-of select="concat('|', $cmdid, '|', ($partof/ead:did/ead:unitid)[1]/string(), '|', $partof/@level, '|', ($partof/ead:did/ead:unittitle)[1]/string())"/>
</xsl:when>
<xsl:otherwise>
<xsl:value-of select="concat('|', $cmdid, '||', $partof/@level, '|', ($partof/ead:did/ead:unittitle)[1]/string())"/>
</xsl:otherwise>
</xsl:choose>
</xsl:function>
<xsl:function name="local:normSpaceAndCase" as="xs:string">
<xsl:param name="str" as="xs:string"/>
<xsl:value-of select="replace(lower-case($str), '\s+', '')"/>
</xsl:function>
<pattern>
<let name="isfullcat" value="boolean(/ead:ead/ead:archdesc/ead:dsc/ead:*)"/>
<let name="issingleitem" value="boolean(/ead:ead/ead:archdesc/@level = 'item')"/>
<let name="isatfile" value="boolean(contains(normalize-space(/ead:ead/ead:eadheader/ead:profiledesc/ead:creation/string()), 'Archivists'' Toolkit'))"/>
<let name="serieslevels" value="('series','subseries','otherlevel')"/>
<let name="filelevels" value="('file')"/>
<let name="unknowndates" value="('n.d.', 'Date not recorded at time of cataloguing')"/>
<let name="scurls" value="('http://solo.ouls.ox.ac.uk/permalink/f/n9grc9/oxfaleph010801538','http://solo.bodleian.ox.ac.uk/permalink/f/n9grc9/oxfaleph010801538','http://solo.bodleian.ox.ac.uk/permalink/f/n9grc9/oxfaleph010116452','http://solo.ouls.ox.ac.uk/permalink/f/n9grc9/oxfaleph010116452')"/>
<!-- Banned elements at all levels -->
<rule
context="ead:abbr | ead:accruals | ead:address | ead:addressline | ead:appraisal | ead:arc | ead:archdescgrp | ead:archref | ead:bibseries | ead:change | ead:chronitem | ead:chronlist | ead:colspec | ead:creation | ead:daogrp | ead:daoloc | ead:defitem | ead:descgrp | ead:descrules | ead:dscgrp | ead:eadgrp | ead:emph | ead:entry | ead:event | ead:eventgrp | ead:extptr | ead:extptrloc | ead:extrefloc | ead:fileplan | ead:frontmatter | ead:function | ead:genreform | ead:head01 | ead:head02 | ead:imprint | ead:label | ead:lb | ead:legalstatus | ead:linkgrp | ead:listhead | ead:materialspec | ead:name | ead:namegrp | ead:note | ead:notestmt | ead:num | ead:occupation | ead:origination | ead:phystech | ead:processinfo | ead:ptr | ead:ptrgrp | ead:ptrloc | ead:publisher | ead:ref | ead:resource | ead:row | ead:runner | ead:seriesstmt | ead:subarea | ead:subtitle | ead:table | ead:tbody | ead:tgroup | ead:thead | ead:titlepage"
role="error">
<report test=".">
Banned element: <name/>
<value-of select="local:getUnitId(.)"/>
</report>
</rule>
<!-- Mandatory elements at collection level -->
<rule
context="/ead:ead/ead:archdesc"
role="error">
<assert test="ead:acqinfo">
Missing element: acqinfo
<value-of select="local:getUnitId(.)"/>
</assert>
<assert test="ead:bioghist">
Missing element: bioghist
<value-of select="local:getUnitId(.)"/>
</assert>
<assert test="ead:controlaccess">
Missing element: controlaccess
<value-of select="local:getUnitId(.)"/>
</assert>
<assert test="ead:odd[($issingleitem and normalize-space(string(ead:head[1])) = 'Shelfmark:') or ($isfullcat and normalize-space(string(ead:head[1])) = 'Full range of shelfmarks:')]">
Missing element: odd (with a heading of "Shelfmark:" for single items or "Full range of shelfmarks:" for full catalogues)
<value-of select="local:getUnitId(.)"/>
</assert>
<assert test="ead:prefercite">
Missing element: prefercite
<value-of select="local:getUnitId(.)"/>
</assert>
<assert test="ead:scopecontent">
Missing element: scopecontent
<value-of select="local:getUnitId(.)"/>
</assert>
</rule>
<!-- More mandatory elements at collection level -->
<rule
context="/ead:ead/ead:archdesc/ead:did"
role="error">
<assert test="ead:abstract">
Missing element: abstract
<value-of select="local:getUnitId(.)"/>
</assert>
<assert test="ead:physdesc/ead:extent">
Missing element: extent
<value-of select="local:getUnitId(.)"/>
</assert>
<assert test="ead:langmaterial">
Missing element: langmaterial
<value-of select="local:getUnitId(.)"/>
</assert>
<assert test="ead:unitid">
Missing element: unitid
<value-of select="local:getUnitId(.)"/>
</assert>
<assert test="ead:unitdate">
Missing element: unitdate
<value-of select="local:getUnitId(.)"/>
</assert>
</rule>
<!-- Banned elements at collection level -->
<rule
context="ead:altformavail | ead:blockquote | ead:dimensions | ead:physloc"
role="error">
<let name="iscollectionlevel" value="boolean(ancestor::ead:*[@level][1]/self::archdesc)"/>
<report test="iscollectionlevel">
Banned element: <name/>
<value-of select="local:getUnitId(.)"/>
</report>
</rule>
<!-- Collection-level controlaccess -->
<rule
context="/ead:ead/ead:archdesc/ead:controlaccess"
role="warn">
<assert
test=".//ead:subject and (.//ead:persname or .//ead:corpname)">
Missing elements: controlaccess should contain at least one subject and at least one name
<value-of select="local:getUnitId(.)"/>
</assert>
</rule>
<!-- Collection-level physdesc -->
<rule
context="/ead:ead/ead:archdesc/ead:did/ead:physdesc[ead:extent]"
role="warn">
<report
test="$issingleitem and count(ead:extent) gt 1">
Too many elements: multiple extents
<value-of select="local:getUnitId(.)"/>
</report>
<report
test="$isfullcat and count(ead:extent) ne 2">
Too few elements: there should be two extents (one for shelfmarks/boxes and one for linear metres)
<value-of select="local:getUnitId(.)"/>
</report>
</rule>
<rule
context="/ead:ead/ead:archdesc/ead:did/ead:physdesc/ead:physfacet"
role="error">
<report
test="preceding-sibling::ead:extent or following-sibling::ead:extent">
Incorrect siblings: physfacet must be in a separate physdesc from extent
<value-of select="local:getUnitId(.)"/>
</report>
</rule>
<!-- Banned elements at series level -->
<rule
context="ead:abstract | ead:accessrestrict | ead:bibliography | ead:blockquote | ead:date | ead:dimensions | ead:extent | ead:index | ead:otherfindaid | ead:physdesc | ead:physfacet | ead:physloc | ead:prefercite | ead:refloc"
role="error">
<let name="isserieslevel" value="boolean(ancestor::ead:*[@level][1]/@level = $serieslevels)"/>
<report test="$isserieslevel">
Banned element: <name/>
<value-of select="local:getUnitId(.)"/>
</report>
</rule>
<!-- Mandatory elements at series level -->
<rule
context="ead:*[@level = $serieslevels]"
role="error">
<assert test="not(ead:*[@level = $filelevels]) or ead:odd/ead:head[normalize-space(string()) = 'Full range of shelfmarks:']">
Missing element: odd (with a heading of "Full range of shelfmarks:" in a series/sub-series/otherlevel which contains child file levels)
<value-of select="local:getUnitId(.)"/>
</assert>
</rule>
<!-- More mandatory elements at series level -->
<rule
context="ead:*[@level = $serieslevels]/ead:did"
role="error">
<assert test="ead:unittitle">
Missing element: unittitle
<value-of select="local:getUnitId(.)"/>
</assert>
<assert test="ead:unitdate">
Missing element: unitdate
<value-of select="local:getUnitId(.)"/>
</assert>
<assert test="ead:langmaterial">
Missing element: langmaterial
<value-of select="local:getUnitId(.)"/>
</assert>
</rule>
<!-- Banned elements at file level -->
<rule
context="ead:abstract | ead:extent | ead:index | ead:otherfindaid | ead:prefercite"
role="error">
<let name="isfilelevel" value="boolean(ancestor::ead:*[@level][1]/@level = $filelevels)"/>
<report
test="$isfilelevel">
Banned element: <name/>
<value-of select="local:getUnitId(.)"/>
</report>
</rule>
<!-- Mandatory elements at file level -->
<rule
context="ead:*[@level = $filelevels]/ead:did"
role="error">
<assert test="ead:physdesc/ead:extent">
Missing element: extent
<value-of select="local:getUnitId(.)"/>
</assert>
<assert test="ead:langmaterial">
Missing element: langmaterial
<value-of select="local:getUnitId(.)"/>
</assert>
<assert test="ead:unitid">
Missing element: unitid
<value-of select="local:getUnitId(.)"/>
</assert>
<assert test="ead:unitdate">
Missing element: unitdate
<value-of select="local:getUnitId(.)"/>
</assert>
<assert test="ead:unittitle">
Missing element: unittitle
<value-of select="local:getUnitId(.)"/>
</assert>
</rule>
<!-- Odds at file level -->
<rule
context="ead:*[not(self::ead:archdesc) and @level = $filelevels]/ead:odd"
role="warn">
<assert
test="ead:head[normalize-space(string()) = 'Former reference:']">
Wrong heading: odd elements at file level should have a heading of "Former reference:"
<value-of select="local:getUnitId(.)"/>
</assert>
</rule>
<!-- Language of materials in files exported from Archivits' Toolkit -->
<rule
context="ead:langmaterial[$isatfile]">
<let name="iscollectionlevel" value="boolean(parent::ead:did/parent::ead:archdesc)"/>
<let name="alllangmaterial" value="parent::ead:did/ead:langmaterial"/>
<report
test="$iscollectionlevel and not(count($alllangmaterial) gt 1 and $alllangmaterial[ead:language/@langcode] and $alllangmaterial[not(ead:language/@langcode) and string-length(normalize-space(string(.))) gt 0])"
role="error">
Languages: Need both human and machine readable langmaterials at collection level
<value-of select="local:getUnitId(.)"/>
</report>
<report
test="not($iscollectionlevel) and not($alllangmaterial[ead:language/@langcode])"
role="error">
Languages: A machine readable langmaterial is needed below collection level
<value-of select="local:getUnitId(.)"/>
</report>
<report
test="not($iscollectionlevel) and not(ead:language/@langcode)"
role="info">
Languages: Additional langmaterial below collection level which does not contain langcode(s)
<value-of select="local:getUnitId(.)"/>
</report>
</rule>
<!-- Language of materials in files not exported from Archivits' Toolkit (summary catalogues, exports from ArchivesSpace) -->
<rule
context="ead:langmaterial[not($isatfile)]"
role="error">
<report
test="not(ead:language) or not(every $l in ead:language satisfies $l/@langcode and string-length(normalize-space($l)) gt 0)">
Languages: There must be both language codes and human-readable text in one langmaterial element
<value-of select="local:getUnitId(.)"/>
</report>
</rule>
<!-- Unitdate attributes and values -->
<rule
context="ead:unitdate" >
<let name="isserieslevel" value="boolean(ancestor::ead:*[@level][1]/@level = $serieslevels)"/>
<let name="datetext" value="normalize-space(string(.))"/>
<let name="containsnums" value="boolean(matches($datetext, '\d'))"/>
<report
test="not($isserieslevel) and not(@normal) and $containsnums"
role="error">
Missing attribute: unitdates must have normal attributes
<value-of select="local:getUnitId(.)"/>
</report>
<report
test="$isserieslevel and (@normal or @type)"
role="error">
Banned attribute: unitdates must not have attributes at series level
<value-of select="local:getUnitId(.)"/>
</report>
<report
test="not($containsnums) and not($datetext = $unknowndates)"
role="warn">
Wrong text: When the date is unknown the text should be "<value-of select="string-join($unknowndates, '" or "')"/>"
<value-of select="local:getUnitId(.)"/>
</report>
</rule>
<!-- Nested lists in scopecontent at any level -->
<rule
context="ead:scopecontent"
role="warn">
<report
test="//ead:list[.//ead:list]">
Too much structure: nested list in scopecontent
<value-of select="local:getUnitId(.)"/>
</report>
</rule>
<!-- Extrefs at any level -->
<rule
context="ead:extref">
<assert
test="@*[local-name() = 'href' and namespace-uri() = 'http://www.w3.org/1999/xlink']"
role="error">
Missing attribute: extref needs xlink:href
<value-of select="local:getUnitId(.)"/>
</assert>
<report
test="@*[starts-with(name(), 'ns2')]"
role="warn">
Wrong namespace prefix: <name/> using ns2: instead of xlink: (also need to change declaration in the root element)
<value-of select="local:getUnitId(.)"/>
</report>
<report
test="@*[local-name() = 'href' and not(. = $scurls)]"
role="info">
Optional element: extref
<value-of select="local:getUnitId(.)"/>
</report>
</rule>
<!-- Optional elements at collection level -->
<rule
context="ead:accessrestrict | ead:altformavail | ead:arrangement | ead:bibliography | ead:custodhist | ead:editionstmt | ead:geogname | ead:originalsloc | ead:otherfindaid | ead:relatedmaterial | ead:separatedmaterial | ead:sponsor | ead:userestrict"
role="info">
<let name="iscollectionlevel" value="boolean(ancestor::ead:*[@level][1]/self::archdesc)"/>
<report
test="$iscollectionlevel">
Optional element: <name/>
<value-of select="local:getUnitId(.)"/>
</report>
</rule>
<!-- Optional elements at series level -->
<rule
context="ead:acqinfo | ead:altformavail | ead:arrangement | ead:bibliography | ead:bioghist | ead:corpname | ead:custodhist | ead:famname | ead:geogname | ead:list | ead:persname | ead:originalsloc | ead:relatedmaterial | ead:separatedmaterial | ead:scopecontent | ead:subject"
role="info">
<let name="isserieslevel" value="boolean(ancestor::ead:*[@level][1]/@level = $serieslevels)"/>
<report
test="$isserieslevel">
Optional element: <name/>
<value-of select="local:getUnitId(.)"/>
</report>
</rule>
<!-- Optional elements at file level -->
<rule
context="ead:accessrestrict | ead:acqinfo | ead:altformavail | ead:arrangement | ead:bibliography | ead:bibref | ead:bioghist | ead:corpname | ead:custodhist | ead:dimensions | ead:famname | ead:geogname | ead:list | ead:persname | ead:physloc | ead:odd | ead:originalsloc | ead:relatedmaterial | ead:scopecontent | ead:separatedmaterial | ead:subject | ead:userestrict"
role="info">
<let name="isfilelevel" value="boolean(ancestor::ead:*[@level][1]/@level = $filelevels)"/>
<report
test="$isfilelevel">
Optional element: <name/>
<value-of select="local:getUnitId(.)"/>
</report>
</rule>
<!-- Look for broken entity references in all text nodes -->
<rule context="text()" role="warn">
<report
test="matches(., '&#')">
Probable broken entity references: <value-of select="count(tokenize(., '&#')) - 1"/> instances found in this block of text
<value-of select="local:getUnitId(parent::*)"/>
</report>
</rule>
<!-- Look for containers/barcodes which can cause problems for the ArchivesSpace EAD importer -->
<rule context="ead:container">
<let name="label" value="string(@label)"/>
<let name="normlabel" value="local:normSpaceAndCase($label)"/>
<report
test="matches($label, '[\(\[].*?\s+.*?[\)\]]\s*$')"
role="warn">
Container label: Spaces inside the brackets where ArchivesSpace looks for barcodes
<value-of select="local:getUnitId(.)"/>
</report>
<report
test="count(preceding::ead:container[@label = $label]) ne count(preceding::ead:container[local:normSpaceAndCase(@label) = $normlabel])"
role="error">
Container label: Same as one earlier in the same EAD file but with upper/lower case differences or more/less spaces
<value-of select="local:getUnitId(.)"/>
</report>
</rule>
</pattern>
</xs:appinfo>
</xs:annotation>
</xs:element>
</xs:schema>