diff --git a/src/qlever/Qleverfiles/Qleverfile.wikidata b/src/qlever/Qleverfiles/Qleverfile.wikidata index b7d3d0c8..e6ec6f6c 100644 --- a/src/qlever/Qleverfiles/Qleverfile.wikidata +++ b/src/qlever/Qleverfiles/Qleverfile.wikidata @@ -16,8 +16,7 @@ GET_DATA_URL = https://dumps.wikimedia.org/wikidatawiki/entities GET_DATA_CMD = curl -LRC - -O ${GET_DATA_URL}/latest-all.ttl.bz2 -O ${GET_DATA_URL}/latest-lexemes.ttl.bz2 2>&1 | tee wikidata.download-log.txt && curl -sL ${GET_DATA_URL}/dcatap.rdf | docker run -i --rm -v $$(pwd):/data stain/jena riot --syntax=RDF/XML --output=NT /dev/stdin > dcatap.nt DATE_WIKIDATA = $$(date -r latest-all.ttl.bz2 +%d.%m.%Y || echo "NO_DATE") DATE_WIKIPEDIA = $$(date -r wikipedia-abstracts.nt +%d.%m.%Y || echo "NO_DATE") -DESCRIPTION = Full Wikidata dump from ${GET_DATA_URL} (latest-all.ttl.bz2 and latest-lexemes.ttl.bz2, version ${DATE_WIKIDATA}) + English Wikipeda abstracts (version ${DATE_WIKIPEDIA}, available via schema:description) -TEXT_DESCRIPTION = All English and German literals + all sentences from the English Wikipedia (version ${DATE_WIKIPEDIA}), use with FILTER KEYWORDS(...) +DESCRIPTION = Full Wikidata dump from ${GET_DATA_URL} (latest-all.ttl.bz2 and latest-lexemes.ttl.bz2, version ${DATE_WIKIDATA}) [index] INPUT_FILES = latest-all.ttl.bz2 latest-lexemes.ttl.bz2 dcatap.nt @@ -26,7 +25,6 @@ MULTI_INPUT_JSON = [{ "cmd": "lbzcat -n 4 latest-all.ttl.bz2", "format": "ttl", { "cmd": "cat dcatap.nt", "format": "nt", "parallel": "false" }] SETTINGS_JSON = { "languages-internal": [], "prefixes-external": [""], "locale": { "language": "en", "country": "US", "ignore-punctuation": true }, "ascii-prefixes-only": true, "num-triples-per-batch": 5000000 } STXXL_MEMORY = 10G -TEXT_INDEX = from_text_records [server] PORT = 7001