Skip to content

Commit

Permalink
Proper logging, autocompletion setup, QLeverfile for Wikidata
Browse files Browse the repository at this point in the history
  • Loading branch information
Hannah Bast committed Sep 14, 2023
1 parent 19a3070 commit b3b0b93
Show file tree
Hide file tree
Showing 4 changed files with 193 additions and 96 deletions.
25 changes: 12 additions & 13 deletions Qleverfiles/Qleverfile.dblp
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
# (WITH_TEXT_INDEX = false), the index build takes only ~10 minutes.

[DEFAULT]
NAME = dblp
NAME = dblp

[data]
GET_DATA_URL = https://dblp.org/rdf/${index:FILE_NAMES}
Expand All @@ -17,21 +17,20 @@ INDEX_DESCRIPTION = DBLP computer science bibliography, data from ${GET_DATA_URL
TEXT_DESCRIPTION = All literals, search with ?text ql:contains-entity ?literal . ?text ql:contains-word "..." (where ... are keywords, which may end with a *)

[index]
FILE_NAMES = dblp.ttl.gz
CAT_FILES = zcat ${FILE_NAMES}
SETTINGS_JSON = { "ascii-prefixes-only": false, "num-triples-per-batch": 100000 }
FILE_NAMES = dblp.ttl.gz
CAT_FILES = zcat ${FILE_NAMES}
SETTINGS_JSON = { "ascii-prefixes-only": false, "num-triples-per-batch": 100000 }

[server]
PORT = 7015
ACCESS_TOKEN = ${NAME}_7643543846
MEMORY_FOR_QUERIES_GB = 30
CACHE_MAX_SIZE_GB = 5
PORT = 7015
ACCESS_TOKEN = ${NAME}_7643543846
MEMORY_FOR_QUERIES_GB = 30
CACHE_MAX_SIZE_GB = 5

[docker]
USE_DOCKER = false
IMAGE = adfreiburg/qlever
USE_DOCKER = false
IMAGE = adfreiburg/qlever

[ui]
QLEVERUI_PORT = 7000
QLEVERUI_DIR = qlever-ui
QLEVERUI_CONFIG = dblp
PORT = 7000
CONFIG = dblp
25 changes: 12 additions & 13 deletions Qleverfiles/Qleverfile.olympics
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
# qlever start # starts the server (instant)

[DEFAULT]
NAME = olympics
NAME = olympics

[data]
BASE_URL = https://github.com/wallscope/olympics-rdf
Expand All @@ -14,21 +14,20 @@ INDEX_DESCRIPTION = 120 Years of Olympics, data from ${BASE_URL}
TEXT_DESCRIPTION = All literals, search with FILTER CONTAINS(?var, "...")

[index]
FILE_NAMES = ${NAME}.nt
CAT_FILES = cat ${FILE_NAMES}
SETTINGS_JSON = { "ascii-prefixes-only": false, "num-triples-per-batch": 100000 }
FILE_NAMES = ${NAME}.nt
CAT_FILES = cat ${FILE_NAMES}
SETTINGS_JSON = { "ascii-prefixes-only": false, "num-triples-per-batch": 100000 }

[server]
PORT = 7019
ACCESS_TOKEN = ${NAME}_7643543846
MEMORY_FOR_QUERIES_GB = 5
CACHE_MAX_SIZE_GB = 2
PORT = 7019
ACCESS_TOKEN = ${NAME}_7643543846
MEMORY_FOR_QUERIES_GB = 5
CACHE_MAX_SIZE_GB = 2

[docker]
USE_DOCKER = false
IMAGE = adfreiburg/qlever
USE_DOCKER = false
IMAGE = adfreiburg/qlever

[ui]
QLEVERUI_PORT = 7000
QLEVERUI_DIR = qlever-ui
QLEVERUI_CONFIG = olympics
PORT = 7000
CONFIG = olympics
58 changes: 26 additions & 32 deletions Qleverfiles/Qleverfile.wikidata
Original file line number Diff line number Diff line change
@@ -1,38 +1,32 @@
# Qleverfile for Wikidata, use with https://github.com/ad-freiburg/qlever-control
#
# qlever get-data # downloads two .bz2 files of total size 90 GB (as of 31.07.2022)
# qlever index # takes ~14 hours and ~40 GB RAM (on an AMD Ryzen 9 5900X)
# qlever start # starts the server (takes around 2 minutes)
# qlever get-data downloads two .bz2 files of total size ~100 GB
# qlever index takes ~7 hours and ~40 GB RAM (on an AMD Ryzen 9 5900X)
# qlever start starts the server (takes around 30 seconds)

# Indexer settings
DB = wikidata-latest
RDF_FILES = "latest-lexemes.ttl.bz2 latest-all.ttl.bz2"
EXTRACT_PREFIXES = "for F in ${RDF_FILES}; do bzcat \$F | head -1000 | \grep ^@prefix; done | sort -u > wikidata-latest.prefix-definitions"
CAT_FILES = "bzcat -f wikidata-latest.prefix-definitions ${RDF_FILES}"
WITH_TEXT_INDEX = false
STXXL_MEMORY_GB = 10
SETTINGS_JSON = '{ "languages-internal": ["en"], "prefixes-external": [ "<http://www.wikidata.org/entity/statement", "<http://www.wikidata.org/value", "<http://www.wikidata.org/reference" ], "locale": { "language": "en", "country": "US", "ignore-punctuation": true }, "ascii-prefixes-only": false, "num-triples-per-batch": 5000000 }'
GET_DATA_CMD = "wget -nc https://dumps.wikimedia.org/wikidatawiki/entities/latest-all.ttl.bz2 https://dumps.wikimedia.org/wikidatawiki/entities/latest-lexemes.ttl.bz2"
DATE_ALL = "$(ls -l --time-style=+%d.%m.%Y latest-all.ttl.bz2 2> /dev/null | cut -d' ' -f6)"
DATE_LEXEMES = "$(ls -l --time-style=+%d.%m.%Y latest-lexemes.ttl.bz2 2> /dev/null | cut -d' ' -f6)"
INDEX_DESCRIPTION = "Full Wikidata dump (latest-all.ttl.bz2 from ${DATE_ALL}, latest-lexemes.ttl.bz2 from ${DATE_LEXEMES})"
[data]
NAME = wikidata
GET_DATA_URL = https://dumps.wikimedia.org/wikidatawiki/entities
GET_DATA_CMD = curl -LO -C - ${GET_DATA_URL}/latest-all.ttl.bz2 ${GET_DATA_URL}/latest-lexemes.ttl.bz2
INDEX_DESCRIPTION = "Full Wikidata dump from ${GET_DATA_URL} (latest-all.ttl.bz2 and latest-lexemes.ttl.bz2)"

# Server settings
HOSTNAME = $(hostname -f)
SERVER_PORT = 7001
ACCESS_TOKEN = ${DB}_%RANDOM%
MEMORY_FOR_QUERIES = 50
CACHE_MAX_SIZE_GB = 30
CACHE_MAX_SIZE_GB_SINGLE_ENTRY = 5
CACHE_MAX_NUM_ENTRIES = 100
[index]
FILE_NAMES = latest-lexemes.ttl.bz2 latest-all.ttl.bz2
CAT_FILES = bzcat ${FILE_NAMES}
SETTINGS_JSON = { "languages-internal": ["en"], "prefixes-external": [ "<http://www.wikidata.org/entity/statement", "<http://www.wikidata.org/value", "<http://www.wikidata.org/reference" ], "locale": { "language": "en", "country": "US", "ignore-punctuation": true }, "ascii-prefixes-only": false, "num-triples-per-batch": 5000000 }
WITH_TEXT_INDEX = false
STXXL_MEMORY_GB = 10

# QLever binaries
QLEVER_BIN_DIR = %QLEVER_BIN_DIR%
USE_DOCKER = true
QLEVER_DOCKER_IMAGE = adfreiburg/qlever
QLEVER_DOCKER_CONTAINER = qlever.wikidata
[server]
PORT = 7001
ACCESS_TOKEN = ${DB}_372483264
MEMORY_FOR_QUERIES_GB = 50
CACHE_MAX_SIZE_GB = 30

# QLever UI
QLEVERUI_PORT = 7000
QLEVERUI_DIR = qlever-ui
QLEVERUI_CONFIG = wikidata
[docker]
USE_DOCKER = true
IMAGE = adfreiburg/qlever

[ui]
PORT = 7000
CONFIG = wikidata
Loading

0 comments on commit b3b0b93

Please sign in to comment.