Skip to content

Commit

Permalink
beta.2
Browse files Browse the repository at this point in the history
  • Loading branch information
Bernhard Bermeitinger committed Mar 8, 2017
1 parent 7ec02e7 commit e57942e
Show file tree
Hide file tree
Showing 28 changed files with 88 additions and 92 deletions.
3 changes: 3 additions & 0 deletions .dockerignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
examples/
.gitignore
README.md
13 changes: 10 additions & 3 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -1,15 +1,22 @@
FROM python:3.4.5
FROM python:3.5.2
MAINTAINER Open Semantics Group <[email protected]>

WORKDIR /root
ADD . ./
# update old version
RUN pip list --local --outdated --format=freeze | cut -d= -f 1 | xargs -n1 pip install -U

WORKDIR /app
ADD requirements.txt /app/

# Project requirements
RUN pip install -r requirements.txt
# .. plus some NLTK corpora
RUN python -m nltk.downloader names punkt

# Deployment requirements
RUN pip install 'uwsgi==2.0.14'

# finally add application
ADD src/ /app/

# Go!
CMD [ "uwsgi", "--ini", "uwsgi.ini" ]
25 changes: 16 additions & 9 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,57 +8,64 @@ Useful hints that can save you some time.

## Python Interpreter

__Python 3.4.x__. Probably will run on any Python 3x for the development evironment but the deployment will use version __3.4.5__.
__Python 3.5.x__

Python 3.6 should also work but is not tested.

## Running locally

Install dependencies.

```
$ pip install -r requirements
$ pip install -r requirements.txt
$ pip install uwsgi
```

This will start the service locally bound to `0.0.0.0:5128`.

```
$ python service.py
$ cd src
$ uwsgi --ini uwsgi.ini
```

You must have a CoreNLP server running at `http://corenlp:9000`. If not,
change it in `Resolver.py` to your instance.

## Running within a Docker Container

Build the image.

```
$ docker build -t coref-resolver:dev .
$ docker build -t coreferenece:v1.1.0-beta.2 .
```

Run in background.

```
$ docker run -d --name coref-resolver -p 5128:5128 coref-resolver:dev
$ docker run -d --name coreference -p 5128:5128 coreference:v1.1.0-beta.2
```

Look at the container stdout.

```
$ docker logs -f coref-resolver
$ docker logs -f coreference
```


## Testing


```
curl -X POST \
-H "Content-Type: application/json" \
-d '{"text": "Bernhard is working on two projects. He is employed in Mario and PACE."}' "http://localhost:5128/resolve/text"
-H "Accept: application/json" \
-d '{"text": "Donald Trump is the president of USA. He is a business man."}' "http://localhost:5128/resolve/text"
```

Expected output:

```json
{
"text": "Donald Trump is the president of USA. He is a business man.",
"text": "Donald Trump is the president of USA. Donal Trump is a business man.",
}

```
7 changes: 3 additions & 4 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
joblib==0.10.3
joblib==0.11
pycorenlp==0.3.0
nltk==3.2.1
flask==0.11.1
typing==3.5.2.2
nltk==3.2.2
Flask==0.12
3 changes: 3 additions & 0 deletions run.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,9 @@
BATCH_SIZE = 200


# This file is for mass extractions and not recommended for single use.


def run(input_file: TextIOWrapper, output_file: TextIOWrapper, jobs: int, max_articles: Optional[int]) -> None:
log.info("Import done, now starting work")

Expand Down
18 changes: 9 additions & 9 deletions src/Resolver.py → src/coreference/Resolver.py
Original file line number Diff line number Diff line change
@@ -1,19 +1,19 @@
# -*- coding: utf-8 -*-
import logging
from typing import List, Tuple, Dict, Union, Set

import os
import re
from collections import Counter

from nltk.tokenize import sent_tokenize, word_tokenize
from pycorenlp import StanfordCoreNLP
from typing import List, Tuple, Dict, Union, Set

from classifier import GenderClassifier
from constant_types import PronounType, QuantityType, GenderType, EntityType
from references import NameReference, NominalReference, PronominalReference
from references import ResolvedPassage
from references import Substitution
from references.TypeLookup import TypeLookup
from .classifier import GenderClassifier
from .constant_types import PronounType, QuantityType, GenderType, EntityType
from .references import NameReference, NominalReference, PronominalReference
from .references import ResolvedPassage
from .references import Substitution
from .references.TypeLookup import TypeLookup

logging.getLogger('requests').setLevel(logging.WARNING)

Expand All @@ -31,8 +31,8 @@ class Resolver(object):
)
)
_separator = '---- <> ----'
_nlp = StanfordCoreNLP('http://localhost:9000')
# _nlp = StanfordCoreNLP('http://localhost:9000')
_nlp = StanfordCoreNLP('http://corenlp:9000')

_gender_classifier = GenderClassifier()

Expand Down
Original file line number Diff line number Diff line change
@@ -1,9 +1,8 @@
import random

import nltk
import random
from nltk.corpus import names

from constant_types import GenderType
from ..constant_types import GenderType


class GenderClassifier(object):
Expand Down
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from constant_types import EntityType
from constant_types import GenderType
from constant_types import QuantityType
from ..constant_types import EntityType
from ..constant_types import GenderType
from ..constant_types import QuantityType


class NameReference(object):
Expand Down
File renamed without changes.
Original file line number Diff line number Diff line change
@@ -1,11 +1,9 @@
# -*- coding: utf-8 -*-


# noinspection PyPep8Naming
from constant_types import EntityType
from constant_types import GenderType
from constant_types import PronounType
from constant_types import QuantityType
from ..constant_types import EntityType
from ..constant_types import GenderType
from ..constant_types import PronounType
from ..constant_types import QuantityType


class PronominalReference(object):
Expand Down
14 changes: 14 additions & 0 deletions src/coreference/references/ResolvedPassage.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
# -*- coding: utf-8 -*-
from typing import Dict


class ResolvedPassage(object):
def __init__(self, resolved_passage: int, linked_entities: Dict[str, str]):
self.resolved_passage = resolved_passage
self.linked_entities = linked_entities

def __str__(self):
return "{self.resolved_passage} {self.linked_entities}".format(self=self)

def __repr__(self):
return self.__str__()
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
# -*- coding: utf-8 -*-


# noinspection PyPep8Naming
class Substitution(object):
def __init__(self, sentence_index: int, original: str, reference: str):
self.sentence_index = sentence_index
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,12 @@ class TypeLookup(object):
__separator = ' -----+++----- '

def __init__(self, filename: str):
self.__instanceTypes = {}
self.__instance_types = {}

with bz2.open(filename, 'rt') as f:
for line in f:
fields = line.split(self.__separator)
self.__instanceTypes[fields[0].replace('<', '').replace('>', '')] = fields[1].replace('\n', '')
self.__instance_types[fields[0].replace('<', '').replace('>', '')] = fields[1].replace('\n', '')

def type(self, uri: str) -> str:
return self.__instanceTypes[uri] if uri in self.__instanceTypes else ''
return self.__instance_types[uri] if uri in self.__instance_types else ''
File renamed without changes.
File renamed without changes.
2 changes: 2 additions & 0 deletions src/tests/Tests.py → src/coreference/tests/Tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@
from Resolver import Resolver


# UNUSED, UNSUPPORTED, UNMAINTAINED

def resolve(filename, uri):
with open(filename, 'r') as f:
text = f.read()
Expand Down
Original file line number Diff line number Diff line change
@@ -1,21 +1,11 @@
import logging
import os
import sys
import unittest

from nltk import sent_tokenize

logging.getLogger('requests').setLevel(logging.WARNING)

sys.path.append(
os.path.dirname(
os.path.dirname(
os.path.abspath(__file__)
)
)
)
from ..Resolver import Resolver

from Resolver import Resolver
logging.getLogger('requests').setLevel(logging.WARNING)


# noinspection PyProtectedMember
Expand Down
Original file line number Diff line number Diff line change
@@ -1,24 +1,14 @@
import logging
import os
import sys
import unittest

import os
from nltk import sent_tokenize

logging.getLogger('requests').setLevel(logging.WARNING)
from ..Resolver import Resolver

sys.path.append(
os.path.dirname(
os.path.dirname(
os.path.abspath(__file__)
)
)
)

from Resolver import Resolver
logging.getLogger('requests').setLevel(logging.WARNING)


# noinspection PyProtectedMember
class TestResolverForBarackObama(unittest.TestCase):
def setUp(self):
self.maxDiff = None
Expand Down
15 changes: 0 additions & 15 deletions src/references/ResolvedPassage.py

This file was deleted.

16 changes: 8 additions & 8 deletions service.py → src/service.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,18 +4,18 @@

from flask import Flask, jsonify, make_response, request

from Resolver import Resolver
from coreference.Resolver import Resolver

application = Flask(__name__)
app = Flask(__name__)


@application.route('/', methods=['GET'])
@app.route('/', methods=['GET'])
def hello():
return jsonify({'name': 'Text Coreference Resolver',
'server_time': datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")})


@application.route('/resolve/wiki', methods=['POST'])
@app.route('/resolve/wiki', methods=['POST'])
def resolve_wiki():
data = request.get_json()
if data is None:
Expand All @@ -31,7 +31,7 @@ def resolve_wiki():
return jsonify({'text': substituted})


@application.route('/resolve/text', methods=['POST'])
@app.route('/resolve/text', methods=['POST'])
def resolve_text():
data = request.get_json()
if data is None:
Expand All @@ -47,15 +47,15 @@ def resolve_text():
return jsonify({'text': substituted})


@application.route('/resolve/link', methods=['POST'])
@app.route('/resolve/link', methods=['POST'])
def resolve_link():
return make_response(jsonify({'message': 'Not implemented.'}), 501)


@application.errorhandler(404)
@app.errorhandler(404)
def page_not_found(_):
return make_response(jsonify({'message': 'No interface defined for URL'}), 404)


if __name__ == "__main__":
application.run(port=5128, debug=False, use_reloader=False, host='0.0.0.0')
app.run(port=5128, debug=False, use_reloader=False, host='0.0.0.0')
6 changes: 3 additions & 3 deletions uwsgi.ini → src/uwsgi.ini
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
[uwsgi]
protocol = http
socket = 0.0.0.0:5128
mount = /corefapp=service.py
callable = application
mount = /app=service.py
callable = app
processes = 4
master = true
harakiri = 600
uid = root
gid = root
no-orphans = true
plugins = python3
enable-threads = "*"

0 comments on commit e57942e

Please sign in to comment.