Skip to content

Commit

Permalink
Refactor code to use ycsettings
Browse files Browse the repository at this point in the history
  • Loading branch information
skylander86 committed Sep 27, 2017
1 parent 3619a4f commit d877267
Show file tree
Hide file tree
Showing 8 changed files with 57 additions and 150 deletions.
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -8,3 +8,4 @@ tabulate
pyyaml

uriutils
ycsettings
10 changes: 6 additions & 4 deletions ycml/featclass/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,12 +7,14 @@

from sklearn.base import BaseEstimator, ClassifierMixin

from ycsettings import Settings

from ..classifiers import load_classifier
from ..featurizers import load_featurizer

from uriutils import uri_open

from ..utils import get_settings, load_dictionary_from_file, get_class_from_module_path, chunked_iterator
from ..utils import load_dictionary_from_file, get_class_from_module_path, chunked_iterator

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -116,9 +118,9 @@ def load_featclass(*, settings={}, uri=None, check_environment=True):
if uri is not None:
settings_from_uri = load_dictionary_from_file(uri)

sources = ('env', settings_from_uri, settings) if check_environment else (settings_from_uri, settings)
featclass_type = get_settings(key='featclass_type', sources=sources, raise_on_missing=True)
featclass_parameters = get_settings(key='featclass_parameters', sources=sources, default={})
settings = Settings(settings_from_uri, settings, search_first=['env', 'env_settings_uri'] if check_environment else [])
featclass_type = settings.get('featclass_type', raise_exception=True)
featclass_parameters = settings.getdict('featclass_parameters', default={})

featclass_class = get_class_from_module_path(featclass_type)
featclass = featclass_class(**featclass_parameters)
Expand Down
32 changes: 14 additions & 18 deletions ycml/http_daemon/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,10 @@

from uriutils import URIFileType

from ycsettings import Settings

# We use ycml full paths here so it is easy to copy and paste this code.
from ycml.utils import get_settings, load_dictionary_from_file
from ycml.utils import load_dictionary_from_file
from ycml.featclass import load_featclass
from ycml.http_daemon.decorators import check_api_token

Expand All @@ -27,9 +29,9 @@
logger = logging.getLogger('ycml.http_daemon')


def create_app(A, file_settings):
log_level = get_settings(key='log_level', sources=('env', file_settings), default='INFO').upper()
log_format = get_settings(key='log_format', sources=(A, 'env', file_settings), default='%(asctime)-15s [%(name)s-%(process)d] %(levelname)s: %(message)s')
def create_app(A, settings):
log_level = settings.get('log_level', default='INFO').upper()
log_format = settings.get('log_format', default='%(asctime)-15s [%(name)s-%(process)d] %(levelname)s: %(message)s')
logging.basicConfig(format=log_format, level=logging.getLevelName(log_level))

config_dict = dict(
Expand All @@ -39,9 +41,9 @@ def create_app(A, file_settings):
)
config_dict['JSONIFY_PRETTYPRINT_REGULAR'] = config_dict['DEBUG']

api_token = get_settings(key='http_daemon_api_token', sources=('env', file_settings))
api_token = settings.get('http_daemon_api_token')
if api_token is None:
http_daemon_uri = get_settings(key='http_daemon_uri', sources=('env', file_settings), raise_on_missing=True)
http_daemon_uri = settings.get('http_daemon_uri', raise_exception=True)
api_token = os.path.basename(urlparse(http_daemon_uri).path)
#end if
config_dict['api_token'] = api_token
Expand All @@ -50,7 +52,7 @@ def create_app(A, file_settings):
app.config.update(config_dict)

with app.app_context():
current_app.config['featclass'] = load_featclass(settings=file_settings, uri=get_settings(key='featclass_uri', sources=('env', file_settings)))
current_app.config['featclass'] = load_featclass(settings=settings, uri=settings.get('featclass_uri'))
if tf: current_app.config['tf_graph'] = tf.get_default_graph()
#end with

Expand Down Expand Up @@ -126,10 +128,7 @@ def api():


def gunicorn_app(environ, start_response):
settings_uri = get_settings(key='settings_uri', sources=('env',), raise_on_missing=True)
file_settings = load_dictionary_from_file(settings_uri)

app = create_app({}, file_settings)
app = create_app({}, Settings())

return app(environ, start_response)
#end def
Expand All @@ -143,14 +142,11 @@ def gunicorn_app(environ, start_response):
parser.add_argument('-p', '--port', type=int, default=None, metavar='p', help='Port to listen on.', dest='http_daemon_port')
A = parser.parse_args()

if A.settings: settings_uri = A.settings
else: settings_uri = get_settings(key='settings_uri', sources=('env', A), raise_on_missing=True)

file_settings = load_dictionary_from_file(settings_uri)
settings = Settings(A)

http_daemon_port = get_settings(key='http_daemon_port', sources=('env', A, file_settings))
http_daemon_port = settings.get('http_daemon_port')
if http_daemon_port is None:
http_daemon_uri = get_settings(key='http_daemon_uri', sources=('env', file_settings), raise_on_missing=True)
http_daemon_uri = settings.get('http_daemon_uri', raise_exception=True)
http_daemon_port = urlparse(http_daemon_uri).port
#end if

Expand All @@ -159,6 +155,6 @@ def gunicorn_app(environ, start_response):

http_daemon_port = int(http_daemon_port)

app = create_app(A, file_settings)
app = create_app(A, settings)
app.run(debug=A.debug, host='0.0.0.0', use_reloader=False, port=http_daemon_port)
#end if
11 changes: 6 additions & 5 deletions ycml/scripts/evaluate.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,11 +10,12 @@

from uriutils import URIFileType, URIType

from ycsettings import Settings

from ..classifiers import load_classifier, get_thresholds_from_file
from ..featurizers import load_featurized
from ..utils import classification_report, find_best_thresholds, generate_pr_curves
from ..utils import get_settings
from ..utils import load_dictionary_from_file, save_dictionary_to_file
from ..utils import save_dictionary_to_file

__all__ = []

Expand Down Expand Up @@ -44,10 +45,10 @@ def main():

A = parser.parse_args()

file_settings = load_dictionary_from_file(A.settings) if A.settings else {}
settings = Settings(A)

log_level = get_settings(key='log_level', sources=(A, 'env', file_settings), default='DEBUG').upper()
log_format = get_settings(key='log_format', sources=(A, 'env', file_settings), default='%(asctime)-15s [%(name)s-%(process)d] %(levelname)s: %(message)s')
log_level = settings.get('log_level', default='DEBUG').upper()
log_format = settings.get('log_format', default='%(asctime)-15s [%(name)s-%(process)d] %(levelname)s: %(message)s')
logging.basicConfig(format=log_format, level=logging.getLevelName(log_level))

if A.classifier_info:
Expand Down
20 changes: 11 additions & 9 deletions ycml/scripts/featurize.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,10 @@

from uriutils import URIFileType

from ycsettings import Settings

from ..featurizers import load_featurizer, load_featurized, save_featurized
from ..utils import load_instances, load_dictionary_from_file, get_settings, parse_n_jobs, get_class_from_module_path
from ..utils import load_instances, get_class_from_module_path

__all__ = []

Expand All @@ -23,7 +25,7 @@ def main():
parser.add_argument('featurizer_type', type=str, metavar='<featurizer_type>', nargs='?', default=None, help='Name of featurizer model to use.')
parser.add_argument('-i', '--instances', type=URIFileType('r'), nargs='*', default=[], metavar='<instances>', help='List of instance files to featurize.')
parser.add_argument('-o', '--output', type=URIFileType('wb'), metavar='<features_uri>', help='Save featurized instances here.')
parser.add_argument('-s', '--settings', type=URIFileType(), metavar='<settings_uri>', help='Settings file to configure models.')
parser.add_argument('-s', '--settings', dest='settings_uri', type=URIFileType(), metavar='<settings_uri>', help='Settings file to configure models.')
parser.add_argument('--n-jobs', type=int, metavar='<N>', help='No. of processes to use during featurization.')
parser.add_argument('--log-level', type=str, metavar='<log_level>', help='Set log level of logger.')
parser.add_argument('--shuffle', action='store_true', help='Shuffle ordering of instances before writing them to file.')
Expand All @@ -36,17 +38,17 @@ def main():
group.add_argument('-v', '--verify', type=URIFileType(), metavar=('<featurizer_uri>', '<featurized_uri>'), nargs=2, help='Verify that the featurized instance file came from the same featurizer model.')
A = parser.parse_args()

file_settings = load_dictionary_from_file(A.settings) if A.settings else {}
settings = Settings(A, search_first=['env', 'env_settings_uri'])

log_level = get_settings(key='log_level', sources=(A, 'env', file_settings), default='DEBUG').upper()
log_format = get_settings(key='log_format', sources=(A, 'env', file_settings), default='%(asctime)-15s [%(name)s-%(process)d] %(levelname)s: %(message)s')
log_level = settings.get('log_level', default='DEBUG').upper()
log_format = settings.get('log_format', default='%(asctime)-15s [%(name)s-%(process)d] %(levelname)s: %(message)s')
logging.basicConfig(format=log_format, level=logging.getLevelName(log_level))

featurizer_type = get_settings(key='featurizer_type', sources=(A, 'env', file_settings))
featurizer_parameters = get_settings(key='featurizer_parameters', sources=(file_settings, ), default={})
featurizer_parameters['n_jobs'] = parse_n_jobs(get_settings(key='n_jobs', sources=(A, 'env', featurizer_parameters, file_settings), default=1))
featurizer_type = settings.get('featurizer_type')
featurizer_parameters = settings.get('featurizer_parameters', default={})
featurizer_parameters['n_jobs'] = settings.getnjobs('n_jobs', default=1)

labels_field = get_settings(key='labels_field', sources=('env', file_settings), default='labels')
labels_field = settings.get('labels_field', default='labels')
logger.debug('Using "{}" for labels field.'.format(labels_field))

if A.instances:
Expand Down
14 changes: 8 additions & 6 deletions ycml/scripts/predict.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,9 @@

from uriutils import URIFileType

from ..utils import load_dictionary_from_file, load_instances, get_settings
from ycsettings import Settings

from ..utils import load_dictionary_from_file, load_instances
from ..featclass import load_featclass

logger = logging.getLogger(__name__)
Expand All @@ -15,20 +17,20 @@ def main():
parser = ArgumentParser(description='Predict instances using ML classifier.')
parser.add_argument('-s', '--settings', type=URIFileType(), metavar='<settings_file>', help='Settings file to configure models.')
parser.add_argument('instances', type=URIFileType('r'), metavar='<instances>', help='Instances to use for prediction.')
parser.add_argument('--featclass', type=URIFileType(), metavar='<featclass_uri>', help='Featclass configuration file to use for prediction.')
parser.add_argument('--featclass', type=URIFileType('r'), metavar='<featclass_uri>', help='Featclass configuration file to use for prediction.')
parser.add_argument('-p', '--probabilities', action='store_true', help='Also save prediction probabilities.')
parser.add_argument('-o', '--output', type=URIFileType('w'), default=sys.stdout.buffer, help='Save predictions to this file.')

A = parser.parse_args()

file_settings = load_dictionary_from_file(A.settings) if A.settings else {}
settings = Settings(A)

log_level = get_settings(key='log_level', sources=(A, 'env', file_settings), default='DEBUG').upper()
log_format = get_settings(key='log_format', sources=(A, 'env', file_settings), default='%(asctime)-15s [%(name)s-%(process)d] %(levelname)s: %(message)s')
log_level = settings.get('log_level', default='DEBUG').upper()
log_format = settings.get('log_format', default='%(asctime)-15s [%(name)s-%(process)d] %(levelname)s: %(message)s')
logging.basicConfig(format=log_format, level=logging.getLevelName(log_level))

if A.featclass: featclass = load_featclass(settings=load_dictionary_from_file(A.featclass))
else: featclass = load_featclass(settings=file_settings, uri=get_settings(key='featclass_uri', sources=('env', file_settings)))
else: featclass = load_featclass(uri=settings.get('featclass_uri'))

for count, args in enumerate(featclass.predictions_generator(load_instances(A.instances, labels_field=None), include_proba=A.probabilities, unbinarized=True), start=1):
if A.probabilities:
Expand Down
19 changes: 10 additions & 9 deletions ycml/scripts/train.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,16 +3,18 @@

from uriutils import URIFileType

from ycsettings import Settings

from ..featurizers import load_featurized
from ..utils import load_dictionary_from_file, get_settings, get_class_from_module_path
from ..utils import get_class_from_module_path

logger = logging.getLogger(__name__)


def main():
parser = ArgumentParser(description='Classify instances using ML classifier.')
parser.add_argument('--log-level', type=str, metavar='<log_level>', help='Set log level of logger.')
parser.add_argument('-s', '--settings', type=URIFileType(), metavar='<settings_file>', help='Settings file to configure models.')
parser.add_argument('-s', '--settings', dest='settings_uri', type=URIFileType(), metavar='<settings_file>', help='Settings file to configure models.')
parser.add_argument('--n-jobs', type=int, metavar='<N>', help='No. of processor cores to use.')

parser.add_argument('classifier_type', type=str, metavar='<classifier_type>', nargs='?', help='Type of classifier model to fit.')
Expand All @@ -24,15 +26,14 @@ def main():

A = parser.parse_args()

file_settings = load_dictionary_from_file(A.settings) if A.settings else {}

log_level = get_settings(key='log_level', sources=(A, 'env', file_settings), default='DEBUG').upper()
log_format = get_settings(key='log_format', sources=(A, 'env', file_settings), default='%(asctime)-15s [%(name)s-%(process)d] %(levelname)s: %(message)s')
settings = Settings(A)
log_level = settings.get('log_level', default='DEBUG').upper()
log_format = settings.get('log_format', default='%(asctime)-15s [%(name)s-%(process)d] %(levelname)s: %(message)s')
logging.basicConfig(format=log_format, level=logging.getLevelName(log_level))

classifier_type = get_settings(key='classifier_type', sources=(A, 'env', file_settings))
classifier_parameters = get_settings((file_settings, 'classifier_parameters'), default={})
classifier_parameters['n_jobs'] = get_settings(key='n_jobs', sources=(A, 'env', classifier_parameters, file_settings), default=1)
classifier_type = settings.get('classifier_type')
classifier_parameters = settings.get('classifier_parameters', default={})
classifier_parameters['n_jobs'] = settings.getnjobs('n_jobs', default=1)

classifier_class = get_class_from_module_path(classifier_type)
if not classifier_class: parser.error('Unknown classifier name "{}".'.format(classifier_type))
Expand Down
100 changes: 1 addition & 99 deletions ycml/utils/settings.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
__all__ = ['load_dictionary_from_file', 'save_dictionary_to_file', 'get_settings', 'bulk_get_settings']
__all__ = ['load_dictionary_from_file', 'save_dictionary_to_file']

import json
import logging
Expand Down Expand Up @@ -52,101 +52,3 @@ def save_dictionary_to_file(f, d, *, force_format=None, title='dictionary', **kw

logger.info('Saved {} {} to <{}>.'.format(ext[1:].upper(), title, f.name))
#end def


ENV_SOURCE_KEYS = ['env', 'environment', 'ENV']


def get_settings(*source_key_pairs, key=None, keys=None, sources=None, default=None, raise_on_missing=None, parse_string_func=None, auto_parse=False):
source_key_pairs = list(source_key_pairs)
if key and sources:
source_key_pairs += [(src, key) for src in sources]

if keys and sources:
source_key_pairs += [(src, k) for src in sources for k in keys]

for src, key in source_key_pairs:
v = None
for key_ in (key, key.upper() if not key.isupper() else None, key.lower() if not key.islower() else None):
if key_ is None: continue

if hasattr(src, key_): v = getattr(src, key_)
elif hasattr(src, 'get'): v = src.get(key_)
elif src in ENV_SOURCE_KEYS: v = os.environ.get(key_)

if v is not None:
if isinstance(v, str):
if parse_string_func: return parse_string_func(v)
elif auto_parse: return _auto_parse(v)
#end if

return v
#end if
#end for
#end for

if raise_on_missing:
raise ValueError('Unable to find setting [{}].'.format(', '.join(sorted(set(key for _, key in source_key_pairs)))))

return default
#end def


def _auto_parse(s):
try: return int(s)
except ValueError: pass

try: return float(s)
except ValueError: pass

try: return json.loads(s)
except json.JSONDecodeError: pass

return s
#end def


def bulk_get_settings(*sources, normalize_func=None, auto_parse=False):
bulk_settings = {}
chosen_sources = {}

def _normalize_key(k): # settings key are always lowercased. IRregardless.
k = k.lower()
normalized = normalize_func(k)
return normalized if normalized else k
#end def

for i, src in enumerate(sources):
if src in ENV_SOURCE_KEYS:
for k, v in os.environ.items():
norm_k = _normalize_key(k)
bulk_settings[norm_k] = v
chosen_sources[norm_k] = 'env'
#end for
elif hasattr(src, 'items'):
for k, v in src.items():
norm_k = _normalize_key(k)
bulk_settings[norm_k] = v
chosen_sources[norm_k] = 'dict_{}'.format(i)
#end for

else:
for k in filter(lambda a: not a.startswith('_'), dir(src)):
v = getattr(src, k)
if callable(v): continue # skip functions, we only want values

norm_k = _normalize_key(k)
bulk_settings[norm_k] = v
chosen_sources[norm_k] = 'obj_{}'.format(i)
#end for
#end if
#end for

if auto_parse:
for k, v in bulk_settings.items():
if isinstance(v, str):
bulk_settings[k] = _auto_parse(v)
#end if

return bulk_settings, chosen_sources
#end def

0 comments on commit d877267

Please sign in to comment.