Skip to content

Commit

Permalink
initial commit
Browse files Browse the repository at this point in the history
  • Loading branch information
eloiferrer committed Sep 5, 2023
0 parents commit a07d433
Show file tree
Hide file tree
Showing 15 changed files with 277 additions and 0 deletions.
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
docker-compose.yml
venv/
mediawiki/
21 changes: 21 additions & 0 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
FROM python:3.8-slim

RUN apt-get clean \
&& apt-get -y update

RUN apt-get -y install nginx \
&& apt-get -y install python3-dev \
&& apt-get -y install build-essential

WORKDIR /usr/src/app

COPY server ./server
COPY start.sh ./
COPY requirements.txt ./
COPY uwsgi.ini ./

RUN pip install -r requirements.txt --src /usr/local/src

COPY nginx.conf /etc/nginx
RUN chmod +x ./start.sh
CMD ["./start.sh"]
18 changes: 18 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
# importer-api
Importer API to enable the execution of importer scripts outside the production server.

## Documentation

* ```/items/<item_id>/mapping```
Returns the Mardi ID and the Wikidata ID for a given item.
Wikidata IDs must be prefixed with ```wd:```

* ```/properties/<property_id>/mapping```
Returns the Mardi ID and the Wikidata ID for a given property.
Wikidata IDs must be prefixed with ```wdt:```

* ```/search/items/<label>```
Returns a list of item IDs that have the given label

* ```/search/properties/<label>```
Returns the property ID that has the given label
37 changes: 37 additions & 0 deletions nginx.conf
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
user www-data;
worker_processes auto;
pid /run/nginx.pid;

events {
worker_connections 1024;
use epoll;
multi_accept on;
}

http {
access_log /dev/stdout;
error_log /dev/stdout;

sendfile on;
tcp_nopush on;
tcp_nodelay on;
keepalive_timeout 65;
types_hash_max_size 2048;

include /etc/nginx/mime.types;
default_type application/octet-stream;

index index.html index.htm;

server {
listen 5000 default_server;
listen [::]:5000 default_server;
server_name localhost;
root /var/www/html;

location / {
include uwsgi_params;
uwsgi_pass unix:/tmp/uwsgi.socket;
}
}
}
4 changes: 4 additions & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
flask
mysql-connector-python
sqlalchemy
uwsgi
Empty file added server/__init__.py
Empty file.
Binary file added server/__pycache__/__init__.cpython-36.pyc
Binary file not shown.
Binary file added server/__pycache__/hello.cpython-36.pyc
Binary file not shown.
Binary file added server/__pycache__/index.cpython-36.pyc
Binary file not shown.
17 changes: 17 additions & 0 deletions server/connection.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
import os
import sqlalchemy as db

def create_engine():
"""
Creates SQLalchemy engine
Returns:
SQLalchemy engine
"""
db_user = os.environ["MYSQL_USER"]
db_pass = os.environ["MYSQL_PASSWORD"]
db_name = os.environ["MYSQL_DATABASE"]
db_host = os.environ["DB_HOST"]
return db.create_engine(
f"mysql+mysqlconnector://{db_user}:{db_pass}@{db_host}/{db_name}"
)
31 changes: 31 additions & 0 deletions server/index.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
from flask import Flask, request, jsonify

from .mapping import get_mapping
from .search import search_items, search_properties

app = Flask(__name__)

@app.route('/items/<item_id>/mapping')
def get_item_mapping(item_id):
mapping = get_mapping(item_id)
if mapping:
return jsonify(mapping)
return jsonify({'error': 'Item not found'})

@app.route('/properties/<property_id>/mapping')
def get_property_mapping(property_id):
mapping = get_mapping(property_id)
if mapping:
return jsonify(mapping)
return jsonify({'error': 'Property not found'})

@app.route('/search/items/<label>')
def get_items(label):
results = search_items(label)
return jsonify(results)

@app.route('/search/properties/<label>')
def get_properties(label):
results = search_properties(label)
return jsonify(results)

52 changes: 52 additions & 0 deletions server/mapping.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
import re
import sqlalchemy as db

from .connection import create_engine

def get_mapping(entity_id):
match_wikidata = re.match(r'wdt?:([PQ]\d+)', entity_id)
match_local = re.match(r'[PQ]\d+', entity_id)
if match_wikidata:
wikidata_id = match_wikidata.group(1)
local_id = get_local_id(wikidata_id)
if local_id:
return {'local_id': local_id,
'wikidata_id': wikidata_id}
if match_local:
local_id = match_local.group(0)
wikidata_id = get_wikidata_id(local_id)
if wikidata_id:
return {'local_id': local_id,
'wikidata_id': wikidata_id}

def get_local_id(wikidata_id):
engine = create_engine()
metadata = db.MetaData()
table = db.Table(
"wb_id_mapping",
metadata,
autoload_with=engine
)
sql = db.select(table.columns['local_id']).where(
table.columns.wikidata_id == wikidata_id,
)
with engine.connect() as connection:
db_result = connection.execute(sql).fetchone()
if db_result:
return db_result[0]

def get_wikidata_id(local_id):
engine = create_engine()
metadata = db.MetaData()
table = db.Table(
"wb_id_mapping",
metadata,
autoload_with=engine
)
sql = db.select(table.columns['wikidata_id']).where(
table.columns.local_id == local_id,
)
with engine.connect() as connection:
db_result = connection.execute(sql).fetchone()
if db_result:
return db_result[0]
79 changes: 79 additions & 0 deletions server/search.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
import sqlalchemy as db
import urllib.parse

from .connection import create_engine

def search_items(label):
label = urllib.parse.unquote(label)
engine = create_engine()
with engine.connect() as connection:
metadata = db.MetaData()
try:
wbt_item_terms = db.Table(
"wbt_item_terms", metadata, autoload_with=connection
)
wbt_term_in_lang = db.Table(
"wbt_term_in_lang", metadata, autoload_with=connection
)
wbt_text_in_lang = db.Table(
"wbt_text_in_lang", metadata, autoload_with=connection
)
wbt_text = db.Table(
"wbt_text", metadata, autoload_with=connection
)
query = (db.select(wbt_item_terms.columns.wbit_item_id)
.join(wbt_term_in_lang, wbt_item_terms.columns.wbit_term_in_lang_id == wbt_term_in_lang.columns.wbtl_id)
.join(wbt_text_in_lang, wbt_term_in_lang.columns.wbtl_text_in_lang_id == wbt_text_in_lang.columns.wbxl_id)
.join(wbt_text, wbt_text.columns.wbx_id == wbt_text_in_lang.columns.wbxl_text_id)
.where(db.and_(wbt_text.columns.wbx_text == bytes(label, "utf-8"),
wbt_term_in_lang.columns.wbtl_type_id == 1,
wbt_text_in_lang.columns.wbxl_language == bytes("en", "utf-8"))))
results = connection.execute(query).fetchall()
entity_id = []
if results:
for result in results:
entity_id.append(f"Q{str(result[0])}")

except Exception as e:
raise Exception(
"Error attempting to read mappings from database\n{}".format(e)
)

return {'QID': entity_id}

def search_properties(label):
label = urllib.parse.unquote(label)
engine = create_engine()
with engine.connect() as connection:
metadata = db.MetaData()
try:
wbt_property_terms = db.Table(
"wbt_property_terms", metadata, autoload_with=connection
)
wbt_term_in_lang = db.Table(
"wbt_term_in_lang", metadata, autoload_with=connection
)
wbt_text_in_lang = db.Table(
"wbt_text_in_lang", metadata, autoload_with=connection
)
wbt_text = db.Table(
"wbt_text", metadata, autoload_with=connection
)
query = (db.select(wbt_property_terms.columns.wbpt_property_id)
.join(wbt_term_in_lang, wbt_term_in_lang.columns.wbtl_id == wbt_property_terms.columns.wbpt_term_in_lang_id)
.join(wbt_text_in_lang, wbt_term_in_lang.columns.wbtl_text_in_lang_id == wbt_text_in_lang.columns.wbxl_id)
.join(wbt_text, wbt_text.columns.wbx_id == wbt_text_in_lang.columns.wbxl_text_id)
.where(db.and_(wbt_text.columns.wbx_text == bytes(label, "utf-8"),
wbt_term_in_lang.columns.wbtl_type_id == 1,
wbt_text_in_lang.columns.wbxl_language == bytes("en", "utf-8"))))
results = connection.execute(query).fetchall()
result = ''
if results:
result = f"P{str(results[0][0])}"

except Exception as e:
raise Exception(
"Error attempting to read mappings from database\n{}".format(e)
)

return {'PID': result}
3 changes: 3 additions & 0 deletions start.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
#!/usr/bin/env bash
service nginx start
uwsgi --ini uwsgi.ini
12 changes: 12 additions & 0 deletions uwsgi.ini
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
[uwsgi]
module = server.index:app
uid = www-data
gid = www-data
master = true
processes = 5

socket = /tmp/uwsgi.socket
chmod-sock = 664
vacuum = true

die-on-term = true

0 comments on commit a07d433

Please sign in to comment.