Skip to content

Commit

Permalink
Wikidata Qleverfile without text index (#93)
Browse files Browse the repository at this point in the history
For our own endpoint, we add text from the English Wikipedia. But since this is not part of the Wikidata dataset, we remove that from the official Qleverfile for Wikidata
  • Loading branch information
hannahbast authored Nov 26, 2024
1 parent c6f9643 commit 4fe2447
Showing 1 changed file with 1 addition and 3 deletions.
4 changes: 1 addition & 3 deletions src/qlever/Qleverfiles/Qleverfile.wikidata
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,7 @@ GET_DATA_URL = https://dumps.wikimedia.org/wikidatawiki/entities
GET_DATA_CMD = curl -LRC - -O ${GET_DATA_URL}/latest-all.ttl.bz2 -O ${GET_DATA_URL}/latest-lexemes.ttl.bz2 2>&1 | tee wikidata.download-log.txt && curl -sL ${GET_DATA_URL}/dcatap.rdf | docker run -i --rm -v $$(pwd):/data stain/jena riot --syntax=RDF/XML --output=NT /dev/stdin > dcatap.nt
DATE_WIKIDATA = $$(date -r latest-all.ttl.bz2 +%d.%m.%Y || echo "NO_DATE")
DATE_WIKIPEDIA = $$(date -r wikipedia-abstracts.nt +%d.%m.%Y || echo "NO_DATE")
DESCRIPTION = Full Wikidata dump from ${GET_DATA_URL} (latest-all.ttl.bz2 and latest-lexemes.ttl.bz2, version ${DATE_WIKIDATA}) + English Wikipeda abstracts (version ${DATE_WIKIPEDIA}, available via schema:description)
TEXT_DESCRIPTION = All English and German literals + all sentences from the English Wikipedia (version ${DATE_WIKIPEDIA}), use with FILTER KEYWORDS(...)
DESCRIPTION = Full Wikidata dump from ${GET_DATA_URL} (latest-all.ttl.bz2 and latest-lexemes.ttl.bz2, version ${DATE_WIKIDATA})

[index]
INPUT_FILES = latest-all.ttl.bz2 latest-lexemes.ttl.bz2 dcatap.nt
Expand All @@ -26,7 +25,6 @@ MULTI_INPUT_JSON = [{ "cmd": "lbzcat -n 4 latest-all.ttl.bz2", "format": "ttl",
{ "cmd": "cat dcatap.nt", "format": "nt", "parallel": "false" }]
SETTINGS_JSON = { "languages-internal": [], "prefixes-external": [""], "locale": { "language": "en", "country": "US", "ignore-punctuation": true }, "ascii-prefixes-only": true, "num-triples-per-batch": 5000000 }
STXXL_MEMORY = 10G
TEXT_INDEX = from_text_records

[server]
PORT = 7001
Expand Down

0 comments on commit 4fe2447

Please sign in to comment.