Skip to content

Commit

Permalink
Dewey to additionalClassificationDdc (#1327)
Browse files Browse the repository at this point in the history
Keep only one Dewey code in classification, move rest to additionalClassificationDdc
  • Loading branch information
kwahlin authored Nov 20, 2023
1 parent 60404d1 commit a9c0daf
Show file tree
Hide file tree
Showing 3 changed files with 84 additions and 13 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,8 @@ class WorkComparator {
}
}

Classification.moveAdditionalDewey(result, docs)

return result
}

Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
package se.kb.libris.mergeworks.compare

import se.kb.libris.mergeworks.Doc

import static se.kb.libris.mergeworks.Util.asList

class Classification extends StuffSet {
private static def sabPrecedenceRules = loadSabPrecedenceRules()

Expand Down Expand Up @@ -34,9 +38,9 @@ class Classification extends StuffSet {
return result
}
} else if (isDewey(c1) && isDewey(c2)) {
def code = code1.startsWith(code2.replace("/", ""))
def code = deweyPrecedes(code1, code2)
? code1
: (code2.startsWith(code1.replace("/", "")) ? code2 : null)
: (deweyPrecedes(code2, code1) ? code2 : null)
if (code) {
Map result = [:]
result.putAll(c1)
Expand All @@ -49,18 +53,12 @@ class Classification extends StuffSet {
}
}

boolean isSab(Map c) {
c['inScheme'] && c['inScheme']['code'] =~ 'kssb'
}

String maxSabVersion(c1, c2) {
def v1 = c1['inScheme']['version'] ?: "-1"
def v2 = c2['inScheme']['version'] ?: "-1"
Integer.parseInt(v1) > Integer.parseInt(v2) ? v1 : v2
static boolean isDewey(Map c) {
c['@type'] == 'ClassificationDdc'
}

boolean isDewey(Map c) {
c['@type'] == 'ClassificationDdc'
static boolean deweyPrecedes(String a, String b) {
a.startsWith(b.replace("/", ""))
}

String maxDeweyEdition(c1, c2) {
Expand All @@ -69,10 +67,59 @@ class Classification extends StuffSet {
deweyEdition(v1) > deweyEdition(v2) ? v1 : v2
}

int deweyEdition(String edition) {
static int deweyEdition(String edition) {
Integer.parseInt((edition ?: "0").replaceAll("[^0-9]", ""))
}

static void moveAdditionalDewey(Map mergedWork, Collection<Doc> instanceDocs) {
def deweyOnMerged = asList(mergedWork['classification']).findAll { Map c -> isDewey(c) }
if (deweyOnMerged.size() > 1) {
def allDewey = instanceDocs.collect { it.classification() }
.flatten()
.findAll { Map c -> isDewey(c) }

def preferredDewey = findPreferredDewey(deweyOnMerged, allDewey)

def additionalDewey = deweyOnMerged - preferredDewey

mergedWork['classification'] = asList(mergedWork['classification']) - additionalDewey
mergedWork['additionalClassificationDdc'] = (asList(mergedWork['additionalClassificationDdc']) + additionalDewey).unique()
}
}

static Map findPreferredDewey(List<Map> deweyOnMerged, List<Map> allDewey) {
def occurrenceCount = deweyOnMerged.collectEntries { Map dom ->
def numOccurrences = allDewey.count { Map d ->
def code1 = d['code']
def code2 = dom['code']
code1 && code2 && (deweyPrecedes(code1, code2) || deweyPrecedes(code2, code1))
}
[dom, numOccurrences]
}

def maxOccurrences = occurrenceCount.max { it.value }.value

def preferred = occurrenceCount.findResults { k, v -> v == maxOccurrences ? k : null }
.sort { a, b ->
def aEdition = a['editionEnumeration']
def bEdition = b['editionEnumeration']
deweyEdition(bEdition) <=> deweyEdition(aEdition)
?: bEdition?.contains('swe') <=> aEdition?.contains('swe')
}.first()

return preferred
}

boolean isSab(Map c) {
c['inScheme'] && c['inScheme']['code'] =~ 'kssb'
}

String maxSabVersion(c1, c2) {
def v1 = c1['inScheme']['version'] ?: "-1"
def v2 = c2['inScheme']['version'] ?: "-1"
Integer.parseInt(v1) > Integer.parseInt(v2) ? v1 : v2
}

static String normalizeSabCode(String sab) {
sab.replaceFirst(~/^h/, 'H').with {
it =~ /bf:|z/ ? it : it.replaceAll(~/\s+/, '')
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,4 +19,26 @@ class ClassificationSpec extends Specification {
'Hda.017=c' || 'Hda.018' || 'Hda.017=c'
'He' || 'Hc' || null
}

def "find which of multiple Dewey codes to keep in classification"() {
given:
def onMerged = (0..<editionsOnMerged.size()).collect { i ->
[
'code' : "x" + i,
'editionEnumeration': editionsOnMerged[i]
]
}
def all = allCodes.collect { ['code': it] }

expect:
Classification.findPreferredDewey(onMerged, all) == result

where:
editionsOnMerged || allCodes || result
['23/swe', '23/swe'] || ['x0', 'x0', 'x1', 'x1', 'x1'] || ['code': 'x1', 'editionEnumeration': '23/swe']
[null, '23/swe'] || ['x0', 'x0', 'x1', 'x1', null] || ['code': 'x1', 'editionEnumeration': '23/swe']
['23', '22/swe'] || ['x0', 'x0', 'x1', 'x1'] || ['code': 'x0', 'editionEnumeration': '23']
['23/swe', '23'] || ['x0', 'x0', 'x1', 'x1'] || ['code': 'x0', 'editionEnumeration': '23/swe']
['22', '23/swe'] || ['x0', 'x0', 'x0', 'x1', 'x1'] || ['code': 'x0', 'editionEnumeration': '22']
}
}

0 comments on commit a9c0daf

Please sign in to comment.