Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Dewey to additionalClassificationDdc #1327

Merged
merged 2 commits into from
Nov 20, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,8 @@ class WorkComparator {
}
}

Classification.moveAdditionalDewey(result, docs)

return result
}

Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
package se.kb.libris.mergeworks.compare

import se.kb.libris.mergeworks.Doc

import static se.kb.libris.mergeworks.Util.asList

class Classification extends StuffSet {
private static def sabPrecedenceRules = loadSabPrecedenceRules()

Expand Down Expand Up @@ -34,9 +38,9 @@ class Classification extends StuffSet {
return result
}
} else if (isDewey(c1) && isDewey(c2)) {
def code = code1.startsWith(code2.replace("/", ""))
def code = deweyPrecedes(code1, code2)
? code1
: (code2.startsWith(code1.replace("/", "")) ? code2 : null)
: (deweyPrecedes(code2, code1) ? code2 : null)
if (code) {
Map result = [:]
result.putAll(c1)
Expand All @@ -49,18 +53,12 @@ class Classification extends StuffSet {
}
}

boolean isSab(Map c) {
c['inScheme'] && c['inScheme']['code'] =~ 'kssb'
}

String maxSabVersion(c1, c2) {
def v1 = c1['inScheme']['version'] ?: "-1"
def v2 = c2['inScheme']['version'] ?: "-1"
Integer.parseInt(v1) > Integer.parseInt(v2) ? v1 : v2
static boolean isDewey(Map c) {
c['@type'] == 'ClassificationDdc'
}

boolean isDewey(Map c) {
c['@type'] == 'ClassificationDdc'
static boolean deweyPrecedes(String a, String b) {
a.startsWith(b.replace("/", ""))
}

String maxDeweyEdition(c1, c2) {
Expand All @@ -69,10 +67,59 @@ class Classification extends StuffSet {
deweyEdition(v1) > deweyEdition(v2) ? v1 : v2
}

int deweyEdition(String edition) {
static int deweyEdition(String edition) {
Integer.parseInt((edition ?: "0").replaceAll("[^0-9]", ""))
}

static void moveAdditionalDewey(Map mergedWork, Collection<Doc> instanceDocs) {
def deweyOnMerged = asList(mergedWork['classification']).findAll { Map c -> isDewey(c) }
if (deweyOnMerged.size() > 1) {
def allDewey = instanceDocs.collect { it.classification() }
.flatten()
.findAll { Map c -> isDewey(c) }

def preferredDewey = findPreferredDewey(deweyOnMerged, allDewey)

def additionalDewey = deweyOnMerged - preferredDewey

mergedWork['classification'] = asList(mergedWork['classification']) - additionalDewey
mergedWork['additionalClassificationDdc'] = (asList(mergedWork['additionalClassificationDdc']) + additionalDewey).unique()
}
}

static Map findPreferredDewey(List<Map> deweyOnMerged, List<Map> allDewey) {
def occurrenceCount = deweyOnMerged.collectEntries { Map dom ->
def numOccurrences = allDewey.count { Map d ->
def code1 = d['code']
def code2 = dom['code']
code1 && code2 && (deweyPrecedes(code1, code2) || deweyPrecedes(code2, code1))
}
[dom, numOccurrences]
}

def maxOccurrences = occurrenceCount.max { it.value }.value

def preferred = occurrenceCount.findResults { k, v -> v == maxOccurrences ? k : null }
.sort { a, b ->
def aEdition = a['editionEnumeration']
def bEdition = b['editionEnumeration']
deweyEdition(bEdition) <=> deweyEdition(aEdition)
?: bEdition?.contains('swe') <=> aEdition?.contains('swe')
}.first()

return preferred
}

boolean isSab(Map c) {
c['inScheme'] && c['inScheme']['code'] =~ 'kssb'
}

String maxSabVersion(c1, c2) {
def v1 = c1['inScheme']['version'] ?: "-1"
def v2 = c2['inScheme']['version'] ?: "-1"
Integer.parseInt(v1) > Integer.parseInt(v2) ? v1 : v2
}

static String normalizeSabCode(String sab) {
sab.replaceFirst(~/^h/, 'H').with {
it =~ /bf:|z/ ? it : it.replaceAll(~/\s+/, '')
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,4 +19,26 @@ class ClassificationSpec extends Specification {
'Hda.017=c' || 'Hda.018' || 'Hda.017=c'
'He' || 'Hc' || null
}

def "find which of multiple Dewey codes to keep in classification"() {
given:
def onMerged = (0..<editionsOnMerged.size()).collect { i ->
[
'code' : "x" + i,
'editionEnumeration': editionsOnMerged[i]
]
}
def all = allCodes.collect { ['code': it] }

expect:
Classification.findPreferredDewey(onMerged, all) == result

where:
editionsOnMerged || allCodes || result
['23/swe', '23/swe'] || ['x0', 'x0', 'x1', 'x1', 'x1'] || ['code': 'x1', 'editionEnumeration': '23/swe']
[null, '23/swe'] || ['x0', 'x0', 'x1', 'x1', null] || ['code': 'x1', 'editionEnumeration': '23/swe']
['23', '22/swe'] || ['x0', 'x0', 'x1', 'x1'] || ['code': 'x0', 'editionEnumeration': '23']
['23/swe', '23'] || ['x0', 'x0', 'x1', 'x1'] || ['code': 'x0', 'editionEnumeration': '23/swe']
['22', '23/swe'] || ['x0', 'x0', 'x0', 'x1', 'x1'] || ['code': 'x0', 'editionEnumeration': '22']
}
}