-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
0 parents
commit a07d433
Showing
15 changed files
with
277 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
docker-compose.yml | ||
venv/ | ||
mediawiki/ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
FROM python:3.8-slim | ||
|
||
RUN apt-get clean \ | ||
&& apt-get -y update | ||
|
||
RUN apt-get -y install nginx \ | ||
&& apt-get -y install python3-dev \ | ||
&& apt-get -y install build-essential | ||
|
||
WORKDIR /usr/src/app | ||
|
||
COPY server ./server | ||
COPY start.sh ./ | ||
COPY requirements.txt ./ | ||
COPY uwsgi.ini ./ | ||
|
||
RUN pip install -r requirements.txt --src /usr/local/src | ||
|
||
COPY nginx.conf /etc/nginx | ||
RUN chmod +x ./start.sh | ||
CMD ["./start.sh"] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,18 @@ | ||
# importer-api | ||
Importer API to enable the execution of importer scripts outside the production server. | ||
|
||
## Documentation | ||
|
||
* ```/items/<item_id>/mapping``` | ||
Returns the Mardi ID and the Wikidata ID for a given item. | ||
Wikidata IDs must be prefixed with ```wd:``` | ||
|
||
* ```/properties/<property_id>/mapping``` | ||
Returns the Mardi ID and the Wikidata ID for a given property. | ||
Wikidata IDs must be prefixed with ```wdt:``` | ||
|
||
* ```/search/items/<label>``` | ||
Returns a list of item IDs that have the given label | ||
|
||
* ```/search/properties/<label>``` | ||
Returns the property ID that has the given label |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,37 @@ | ||
user www-data; | ||
worker_processes auto; | ||
pid /run/nginx.pid; | ||
|
||
events { | ||
worker_connections 1024; | ||
use epoll; | ||
multi_accept on; | ||
} | ||
|
||
http { | ||
access_log /dev/stdout; | ||
error_log /dev/stdout; | ||
|
||
sendfile on; | ||
tcp_nopush on; | ||
tcp_nodelay on; | ||
keepalive_timeout 65; | ||
types_hash_max_size 2048; | ||
|
||
include /etc/nginx/mime.types; | ||
default_type application/octet-stream; | ||
|
||
index index.html index.htm; | ||
|
||
server { | ||
listen 5000 default_server; | ||
listen [::]:5000 default_server; | ||
server_name localhost; | ||
root /var/www/html; | ||
|
||
location / { | ||
include uwsgi_params; | ||
uwsgi_pass unix:/tmp/uwsgi.socket; | ||
} | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
flask | ||
mysql-connector-python | ||
sqlalchemy | ||
uwsgi |
Empty file.
Binary file not shown.
Binary file not shown.
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,17 @@ | ||
import os | ||
import sqlalchemy as db | ||
|
||
def create_engine(): | ||
""" | ||
Creates SQLalchemy engine | ||
Returns: | ||
SQLalchemy engine | ||
""" | ||
db_user = os.environ["MYSQL_USER"] | ||
db_pass = os.environ["MYSQL_PASSWORD"] | ||
db_name = os.environ["MYSQL_DATABASE"] | ||
db_host = os.environ["DB_HOST"] | ||
return db.create_engine( | ||
f"mysql+mysqlconnector://{db_user}:{db_pass}@{db_host}/{db_name}" | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,31 @@ | ||
from flask import Flask, request, jsonify | ||
|
||
from .mapping import get_mapping | ||
from .search import search_items, search_properties | ||
|
||
app = Flask(__name__) | ||
|
||
@app.route('/items/<item_id>/mapping') | ||
def get_item_mapping(item_id): | ||
mapping = get_mapping(item_id) | ||
if mapping: | ||
return jsonify(mapping) | ||
return jsonify({'error': 'Item not found'}) | ||
|
||
@app.route('/properties/<property_id>/mapping') | ||
def get_property_mapping(property_id): | ||
mapping = get_mapping(property_id) | ||
if mapping: | ||
return jsonify(mapping) | ||
return jsonify({'error': 'Property not found'}) | ||
|
||
@app.route('/search/items/<label>') | ||
def get_items(label): | ||
results = search_items(label) | ||
return jsonify(results) | ||
|
||
@app.route('/search/properties/<label>') | ||
def get_properties(label): | ||
results = search_properties(label) | ||
return jsonify(results) | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,52 @@ | ||
import re | ||
import sqlalchemy as db | ||
|
||
from .connection import create_engine | ||
|
||
def get_mapping(entity_id): | ||
match_wikidata = re.match(r'wdt?:([PQ]\d+)', entity_id) | ||
match_local = re.match(r'[PQ]\d+', entity_id) | ||
if match_wikidata: | ||
wikidata_id = match_wikidata.group(1) | ||
local_id = get_local_id(wikidata_id) | ||
if local_id: | ||
return {'local_id': local_id, | ||
'wikidata_id': wikidata_id} | ||
if match_local: | ||
local_id = match_local.group(0) | ||
wikidata_id = get_wikidata_id(local_id) | ||
if wikidata_id: | ||
return {'local_id': local_id, | ||
'wikidata_id': wikidata_id} | ||
|
||
def get_local_id(wikidata_id): | ||
engine = create_engine() | ||
metadata = db.MetaData() | ||
table = db.Table( | ||
"wb_id_mapping", | ||
metadata, | ||
autoload_with=engine | ||
) | ||
sql = db.select(table.columns['local_id']).where( | ||
table.columns.wikidata_id == wikidata_id, | ||
) | ||
with engine.connect() as connection: | ||
db_result = connection.execute(sql).fetchone() | ||
if db_result: | ||
return db_result[0] | ||
|
||
def get_wikidata_id(local_id): | ||
engine = create_engine() | ||
metadata = db.MetaData() | ||
table = db.Table( | ||
"wb_id_mapping", | ||
metadata, | ||
autoload_with=engine | ||
) | ||
sql = db.select(table.columns['wikidata_id']).where( | ||
table.columns.local_id == local_id, | ||
) | ||
with engine.connect() as connection: | ||
db_result = connection.execute(sql).fetchone() | ||
if db_result: | ||
return db_result[0] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,79 @@ | ||
import sqlalchemy as db | ||
import urllib.parse | ||
|
||
from .connection import create_engine | ||
|
||
def search_items(label): | ||
label = urllib.parse.unquote(label) | ||
engine = create_engine() | ||
with engine.connect() as connection: | ||
metadata = db.MetaData() | ||
try: | ||
wbt_item_terms = db.Table( | ||
"wbt_item_terms", metadata, autoload_with=connection | ||
) | ||
wbt_term_in_lang = db.Table( | ||
"wbt_term_in_lang", metadata, autoload_with=connection | ||
) | ||
wbt_text_in_lang = db.Table( | ||
"wbt_text_in_lang", metadata, autoload_with=connection | ||
) | ||
wbt_text = db.Table( | ||
"wbt_text", metadata, autoload_with=connection | ||
) | ||
query = (db.select(wbt_item_terms.columns.wbit_item_id) | ||
.join(wbt_term_in_lang, wbt_item_terms.columns.wbit_term_in_lang_id == wbt_term_in_lang.columns.wbtl_id) | ||
.join(wbt_text_in_lang, wbt_term_in_lang.columns.wbtl_text_in_lang_id == wbt_text_in_lang.columns.wbxl_id) | ||
.join(wbt_text, wbt_text.columns.wbx_id == wbt_text_in_lang.columns.wbxl_text_id) | ||
.where(db.and_(wbt_text.columns.wbx_text == bytes(label, "utf-8"), | ||
wbt_term_in_lang.columns.wbtl_type_id == 1, | ||
wbt_text_in_lang.columns.wbxl_language == bytes("en", "utf-8")))) | ||
results = connection.execute(query).fetchall() | ||
entity_id = [] | ||
if results: | ||
for result in results: | ||
entity_id.append(f"Q{str(result[0])}") | ||
|
||
except Exception as e: | ||
raise Exception( | ||
"Error attempting to read mappings from database\n{}".format(e) | ||
) | ||
|
||
return {'QID': entity_id} | ||
|
||
def search_properties(label): | ||
label = urllib.parse.unquote(label) | ||
engine = create_engine() | ||
with engine.connect() as connection: | ||
metadata = db.MetaData() | ||
try: | ||
wbt_property_terms = db.Table( | ||
"wbt_property_terms", metadata, autoload_with=connection | ||
) | ||
wbt_term_in_lang = db.Table( | ||
"wbt_term_in_lang", metadata, autoload_with=connection | ||
) | ||
wbt_text_in_lang = db.Table( | ||
"wbt_text_in_lang", metadata, autoload_with=connection | ||
) | ||
wbt_text = db.Table( | ||
"wbt_text", metadata, autoload_with=connection | ||
) | ||
query = (db.select(wbt_property_terms.columns.wbpt_property_id) | ||
.join(wbt_term_in_lang, wbt_term_in_lang.columns.wbtl_id == wbt_property_terms.columns.wbpt_term_in_lang_id) | ||
.join(wbt_text_in_lang, wbt_term_in_lang.columns.wbtl_text_in_lang_id == wbt_text_in_lang.columns.wbxl_id) | ||
.join(wbt_text, wbt_text.columns.wbx_id == wbt_text_in_lang.columns.wbxl_text_id) | ||
.where(db.and_(wbt_text.columns.wbx_text == bytes(label, "utf-8"), | ||
wbt_term_in_lang.columns.wbtl_type_id == 1, | ||
wbt_text_in_lang.columns.wbxl_language == bytes("en", "utf-8")))) | ||
results = connection.execute(query).fetchall() | ||
result = '' | ||
if results: | ||
result = f"P{str(results[0][0])}" | ||
|
||
except Exception as e: | ||
raise Exception( | ||
"Error attempting to read mappings from database\n{}".format(e) | ||
) | ||
|
||
return {'PID': result} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
#!/usr/bin/env bash | ||
service nginx start | ||
uwsgi --ini uwsgi.ini |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,12 @@ | ||
[uwsgi] | ||
module = server.index:app | ||
uid = www-data | ||
gid = www-data | ||
master = true | ||
processes = 5 | ||
|
||
socket = /tmp/uwsgi.socket | ||
chmod-sock = 664 | ||
vacuum = true | ||
|
||
die-on-term = true |