From ce099d6e537cebb8e695a4f96ac9ce148016cb68 Mon Sep 17 00:00:00 2001 From: Pascal Christoph Date: Mon, 3 Jun 2019 10:39:54 +0200 Subject: [PATCH 1/4] Declare constant; remove temporary debug message See hbz/lobid-resources#1000. --- .../lobid/helper/CreateWikidataNwbibMaps.java | 24 ++++++++++--------- .../lobid/resources/ElasticsearchIndexer.java | 1 - 2 files changed, 13 insertions(+), 12 deletions(-) diff --git a/src/main/java/de/hbz/lobid/helper/CreateWikidataNwbibMaps.java b/src/main/java/de/hbz/lobid/helper/CreateWikidataNwbibMaps.java index 281b6671f3..efffd07a80 100644 --- a/src/main/java/de/hbz/lobid/helper/CreateWikidataNwbibMaps.java +++ b/src/main/java/de/hbz/lobid/helper/CreateWikidataNwbibMaps.java @@ -29,14 +29,14 @@ public class CreateWikidataNwbibMaps { LogManager.getLogger(CreateWikidataNwbibMaps.class); private static final String WARN = "will not renew the map but going with the old one"; - private static Model model = ModelFactory.createDefaultModel(); + private static final Model MODEL = ModelFactory.createDefaultModel(); private static final Property FOCUS = - model.createProperty("http://xmlns.com/foaf/0.1/focus"); + MODEL.createProperty("http://xmlns.com/foaf/0.1/focus"); private static final String SKOS = "http://www.w3.org/2004/02/skos/core#"; private static final Property PREFLABEL = - model.createProperty(SKOS + "prefLabel"); + MODEL.createProperty(SKOS + "prefLabel"); private static final Property NOTATION = - model.createProperty(SKOS + "notation"); + MODEL.createProperty(SKOS + "notation"); private static final File TEST_FN = new File("src/main/resources/nwbib-spatial.tsv"); @@ -48,16 +48,18 @@ public class CreateWikidataNwbibMaps { public static void main(String... args) { try { - model.read(new InputStreamReader(new URL( - "https://github.com/hbz/lobid-vocabs/raw/master/nwbib/nwbib-spatial.ttl") - .openConnection().getInputStream(), - StandardCharsets.UTF_8), null, "TTL"); + MODEL + .read(new InputStreamReader( + new URL( + "https://github.com/hbz/lobid-vocabs/raw/master/nwbib/nwbib-spatial.ttl") + .openConnection().getInputStream(), + StandardCharsets.UTF_8), null, "TTL"); } catch (IOException e) { - LOG.warn("Couldn't lookup nwbib-spatial.ttl," + WARN); + LOG.warn("Couldn't lookup nwbib-spatial.ttl," + WARN, e); return; } StringBuilder sb = new StringBuilder(); - ResIterator it = model.listSubjects(); + ResIterator it = MODEL.listSubjects(); while (it.hasNext()) { Resource res = it.next(); if (res.hasProperty(FOCUS)) @@ -74,7 +76,7 @@ public static void main(String... args) { StandardCharsets.UTF_8); LOG.info("Success: created 'nwbib-spatial.tsv'"); } catch (IOException e) { - LOG.warn("Couldn't write file." + WARN); + LOG.warn("Couldn't write file." + WARN, e); } } } diff --git a/src/main/java/org/lobid/resources/ElasticsearchIndexer.java b/src/main/java/org/lobid/resources/ElasticsearchIndexer.java index b278085885..20c2687109 100644 --- a/src/main/java/org/lobid/resources/ElasticsearchIndexer.java +++ b/src/main/java/org/lobid/resources/ElasticsearchIndexer.java @@ -134,7 +134,6 @@ public void onCloseStream() { updateAliases(); // feed the rest of the bulk if (bulkRequest.numberOfActions() != 0) { - System.out.println(bulkRequest.toString()); BulkResponse bulkResponse = bulkRequest.execute().actionGet(); if (bulkResponse.hasFailures()) { LOG.warn("Bulk insert failed: " + bulkResponse.buildFailureMessage()); From 7aad886562b283bd5ff9e36b31b28deff4b287a1 Mon Sep 17 00:00:00 2001 From: Fabian Steeg Date: Tue, 4 Jun 2019 13:46:50 +0200 Subject: [PATCH 2/4] Update Jackson dependency See https://nvd.nist.gov/vuln/detail/CVE-2019-12086 --- pom.xml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pom.xml b/pom.xml index 1a1eef6085..bff60e6d24 100644 --- a/pom.xml +++ b/pom.xml @@ -141,12 +141,12 @@ com.fasterxml.jackson.core jackson-core - 2.9.8 + 2.9.9 com.fasterxml.jackson.core jackson-databind - 2.9.8 + 2.9.9 org.slf4j From 1951e2ff921f0df790411e01aded8e0d9cdf1d20 Mon Sep 17 00:00:00 2001 From: Pascal Christoph Date: Tue, 11 Jun 2019 11:40:25 +0200 Subject: [PATCH 3/4] Use char-filter to remove hyphens from isbn/issn See #201. --- src/main/resources/index-config.json | 15 +++++++++++++-- web/test/tests/IndexIntegrationTest.java | 4 +++- 2 files changed, 16 insertions(+), 3 deletions(-) diff --git a/src/main/resources/index-config.json b/src/main/resources/index-config.json index 2e6cca8bdf..cfcdef6c93 100644 --- a/src/main/resources/index-config.json +++ b/src/main/resources/index-config.json @@ -36,6 +36,10 @@ ], "tokenizer" : "letter", "type" : "custom" + }, + "hyphen_analyzer": { + "tokenizer": "standard", + "char_filter": "remove-hyphens" } }, "filter" : { @@ -48,6 +52,13 @@ "type" : "snowball", "language" : "German2" } + }, + "char_filter": { + "remove-hyphens": { + "type": "pattern_replace", + "pattern": "-", + "replacement": "" + } } } } @@ -662,7 +673,7 @@ } }, "issn" : { - "analyzer" : "id_analyzer", + "analyzer" : "hyphen_analyzer", "type" : "text" }, "otherTitleInformation" : { @@ -781,7 +792,7 @@ }, "isbn" : { "type" : "text", - "analyzer" : "id_analyzer" + "analyzer" : "hyphen_analyzer" }, "oclcNumber" : { "type" : "text" diff --git a/web/test/tests/IndexIntegrationTest.java b/web/test/tests/IndexIntegrationTest.java index 15c6406f3e..b2d2c078cd 100644 --- a/web/test/tests/IndexIntegrationTest.java +++ b/web/test/tests/IndexIntegrationTest.java @@ -79,7 +79,9 @@ public static Collection data() { { "publication.publishedBy:DÄG", /*->*/ 1 }, { "hasItem.id:\"http\\://lobid.org/items/TT003059252\\:DE-5-58\\:9%2F041#\\!\"", /*->*/ 1 }, { "hasItem.id:TT003059252\\:DE-5-58\\:9%2F041", /*->*/ 0 }, - { "coverage:99", /*->*/ 21} + { "coverage:99", /*->*/ 21}, + { "isbn:3454128013", /*->*/ 1}, + { "isbn:345-4128-013", /*->*/ 1} }); } // @formatter:on From 2bcf83506649eb128af58ca86c1fa3124cd84ef2 Mon Sep 17 00:00:00 2001 From: Adrian Date: Thu, 4 Jul 2019 16:03:12 +0200 Subject: [PATCH 4/4] Fix string2 WDmapping for "Lippischer Wald" --- src/main/resources/string2wikidata.tsv | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/resources/string2wikidata.tsv b/src/main/resources/string2wikidata.tsv index 282a6e75fe..38e495e66f 100644 --- a/src/main/resources/string2wikidata.tsv +++ b/src/main/resources/string2wikidata.tsv @@ -21,7 +21,7 @@ Lipper Bergland | 12 Q875161 Wiehengebirge | 12 Q702826 Warburger Börde | 12 Q2548986 Osning | 12 Q30602340 -Lippischer Wald | 12 Q109773 +Lippischer Wald | 12 Q31312912 Oberwälder Land | 12 Q2011523 Sauerland | 14 Q56041996 Rothaargebirge | 14 Q4219