Skip to content
This repository has been archived by the owner on May 25, 2020. It is now read-only.

Commit

Permalink
Python 3 Compatibility (#10)
Browse files Browse the repository at this point in the history
* Python 2+3 compatibility
  • Loading branch information
Oxylibrium authored and nikita-smetanin committed Jun 15, 2018
1 parent d46c3ef commit ec68708
Show file tree
Hide file tree
Showing 35 changed files with 248 additions and 52 deletions.
4 changes: 3 additions & 1 deletion cakechat/api/response.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
import random

from six.moves import xrange, map

from cakechat.api.config import PREDICTION_MODE, NUM_BEST_CANDIDATES_TO_PICK_FROM, SAMPLING_ATTEMPTS_NUM, \
DEFAULT_RESPONSE
from cakechat.config import INPUT_CONTEXT_SIZE, INPUT_SEQUENCE_LENGTH, PREDICTION_MODES
Expand Down Expand Up @@ -53,7 +55,7 @@ def get_response(dialog_context, emotion):
:param emotion: emotion to condition response
:return: dialog response conditioned on input emotion
"""
tokenized_dialog_context = map(get_tokens_sequence, dialog_context)
tokenized_dialog_context = list(map(get_tokens_sequence, dialog_context))
tokenized_dialog_contexts = [tokenized_dialog_context]
context_tokens_ids = transform_contexts_to_token_ids(tokenized_dialog_contexts, _cakechat_model.token_to_index,
INPUT_SEQUENCE_LENGTH, INPUT_CONTEXT_SIZE)
Expand Down
5 changes: 3 additions & 2 deletions cakechat/api/utils.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from flask import jsonify
from six import text_type


def get_api_error_response(message, code, logger):
Expand All @@ -7,8 +8,8 @@ def get_api_error_response(message, code, logger):


def _is_list_of_unicode_strings(data):
return (isinstance(data, list) or isinstance(data, tuple)) and len(data) > 0 \
and all(isinstance(s, unicode) for s in data)
return bool(data and isinstance(data, (list, tuple)) and
all(isinstance(s, text_type) for s in data))


def parse_dataset_param(params, param_name, required=True):
Expand Down
4 changes: 2 additions & 2 deletions cakechat/api/v1/server.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,9 +20,9 @@ def get_model_response():
try:
dialog_context = parse_dataset_param(params, param_name='context')
except KeyError as e:
return get_api_error_response('Malformed request, no "%s" param was found' % e.message, 400, _logger)
return get_api_error_response('Malformed request, no "%s" param was found' % str(e), 400, _logger)
except ValueError as e:
return get_api_error_response('Malformed request: %s' % e.message, 400, _logger)
return get_api_error_response('Malformed request: %s' % str(e), 400, _logger)

emotion = params.get('emotion', DEFAULT_CONDITION)
if emotion not in EMOTIONS_TYPES:
Expand Down
5 changes: 3 additions & 2 deletions cakechat/dialog_model/inference/candidates/beamsearch.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from itertools import izip_longest
from six.moves import zip_longest

import numpy as np
from six.moves import xrange
import theano

from cakechat.dialog_model.inference.candidates.abstract_generator import AbstractCandidatesGenerator
Expand Down Expand Up @@ -248,7 +249,7 @@ def _generate_candidates_for_one_context(self, condition_id, output_seq_len):

@timer
def generate_candidates(self, context_token_ids, condition_ids, output_seq_len):
x_with_conditions_batch = izip_longest(context_token_ids, condition_ids if condition_ids is not None else [])
x_with_conditions_batch = zip_longest(context_token_ids, condition_ids if condition_ids is not None else [])
result = []
for x, condition_id in x_with_conditions_batch:
self._compute_thought_vectors(x)
Expand Down
1 change: 1 addition & 0 deletions cakechat/dialog_model/inference/candidates/sampling.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import numpy as np
from six.moves import xrange
import theano

from cakechat.dialog_model.inference.candidates.abstract_generator import AbstractCandidatesGenerator
Expand Down
3 changes: 2 additions & 1 deletion cakechat/dialog_model/inference/predict.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import numpy as np
from six.moves import xrange

from cakechat.config import MAX_PREDICTIONS_LENGTH, BEAM_SIZE, MMI_REVERSE_MODEL_SCORE_WEIGHT, DEFAULT_TEMPERATURE, \
SAMPLES_NUM_FOR_RERANKING, PREDICTION_MODES, REPETITION_PENALIZE_COEFFICIENT
Expand Down Expand Up @@ -98,7 +99,7 @@ def get_nn_responses(context_token_ids,
response_tokens_ids = np.reshape(response_tokens_ids, (-1, output_seq_len))
response_tokens = transform_token_ids_to_sentences(response_tokens_ids, nn_model.index_to_token)

lines_num = len(response_tokens) / output_candidates_num
lines_num = len(response_tokens) // output_candidates_num
responses = [response_tokens[i * output_candidates_num:(i + 1) * output_candidates_num] for i in xrange(lines_num)]

return responses
1 change: 1 addition & 0 deletions cakechat/dialog_model/inference/predictor.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import numpy as np
from six.moves import xrange


class Predictor(object):
Expand Down
5 changes: 3 additions & 2 deletions cakechat/dialog_model/inference/reranking.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
from abc import ABCMeta, abstractmethod
from itertools import izip_longest
from six.moves import zip_longest

import numpy as np
from six.moves import xrange

from cakechat.dialog_model.inference.service_tokens import ServiceTokensIDs
from cakechat.dialog_model.inference.utils import get_sequence_score_by_thought_vector, get_sequence_score, \
Expand Down Expand Up @@ -103,7 +104,7 @@ def rerank_candidates(self, contexts, all_candidates, condition_ids):
condition_ids = [] if condition_ids is None else condition_ids # For izip_lingest
candidates_scores = [
self._compute_candidates_scores(context, candidates, condition_id)
for context, candidates, condition_id in izip_longest(contexts, all_candidates, condition_ids)
for context, candidates, condition_id in zip_longest(contexts, all_candidates, condition_ids)
]
scores_order = [np.argsort(-np.array(scores)) for scores in candidates_scores]
batch_size = len(contexts)
Expand Down
1 change: 1 addition & 0 deletions cakechat/dialog_model/inference/tests/predict.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import sys
import unittest
import numpy as np
from six.moves import xrange

sys.path.append(
os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))))
Expand Down
1 change: 1 addition & 0 deletions cakechat/dialog_model/inference/tests/sampling.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

import numpy as np
from scipy.stats import binom
from six.moves import xrange

sys.path.append(
os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))))
Expand Down
1 change: 1 addition & 0 deletions cakechat/dialog_model/inference/utils.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import numpy as np
from six.moves import xrange

from cakechat.config import BATCH_SIZE, DEFAULT_CONDITION
from cakechat.dialog_model.model_utils import get_training_batch
Expand Down
1 change: 1 addition & 0 deletions cakechat/dialog_model/layers.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import theano.tensor as T
from lasagne.layers.base import MergeLayer, Layer
from six.moves import xrange


class RepeatLayer(Layer):
Expand Down
5 changes: 4 additions & 1 deletion cakechat/dialog_model/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@

import lasagne
import numpy as np
from six.moves import xrange
import theano
import theano.tensor as T
from lasagne.init import Normal
Expand Down Expand Up @@ -583,8 +584,10 @@ def print_matrices_weights(self):
total_network_size = 0
for p, v in zip(params, values):
param_size = float(v.nbytes) / 1024 / 1024
# Work around numpy/python 3 regression:
# http://www.markhneedham.com/blog/2017/11/19/python-3-typeerror-unsupported-format-string-passed-to-numpy-ndarray-__format__/
laconic_logger.info('\t{0:<40} dtype: {1:<10} shape: {2:<12} size: {3:<.2f}M'.format(
p.name, v.dtype, v.shape, param_size))
p.name, repr(v.dtype), repr(v.shape), param_size))
total_network_size += param_size
laconic_logger.info('Total network size: {0:.1f} Mb'.format(total_network_size))

Expand Down
20 changes: 11 additions & 9 deletions cakechat/dialog_model/model_utils.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
import os
from collections import namedtuple
from itertools import imap, islice, izip
from itertools import islice

import numpy as np
from six import text_type
from six.moves import xrange, map, zip

from cakechat.config import BASE_CORPUS_NAME, TRAIN_CORPUS_NAME, WORD_EMBEDDING_DIMENSION, INPUT_CONTEXT_SIZE, \
HIDDEN_LAYER_DIMENSION, ENCODER_DEPTH, DECODER_DEPTH, INPUT_SEQUENCE_LENGTH, \
Expand All @@ -21,7 +23,7 @@


def transform_conditions_to_ids(conditions, condition_to_index, n_dialogs):
condition_ids_iterator = imap(
condition_ids_iterator = map(
lambda condition: condition_to_index.get(condition, condition_to_index[DEFAULT_CONDITION]), conditions)
condition_ids = np.full(n_dialogs, condition_to_index[DEFAULT_CONDITION], dtype=np.int32)
for sample_idx, condition_id in enumerate(condition_ids_iterator):
Expand Down Expand Up @@ -143,7 +145,7 @@ def transform_token_ids_to_sentences(y_ids, index_to_token):
response_tokens.append(token_to_add)

response_str = ' '.join(response_tokens)
if not isinstance(response_str, unicode):
if not isinstance(response_str, text_type):
response_str = response_str.decode('utf-8')

responses.append(response_str)
Expand Down Expand Up @@ -178,7 +180,7 @@ def transform_context_token_ids_to_sentences(x_ids, index_to_token):
sample_tokens.append(token_to_add)

sample_str = ' '.join(sample_tokens)
if not isinstance(sample_str, unicode):
if not isinstance(sample_str, text_type):
sample_str = sample_str.decode('utf-8')

context_samples.append(sample_str)
Expand Down Expand Up @@ -235,7 +237,7 @@ def get_w2v_embedding_matrix(tokenized_dialog_lines, index_to_token, add_start_e

def get_training_batch(inputs, batch_size, random_permute=False):
n_samples = inputs[0].shape[0]
n_batches = n_samples / batch_size
n_batches = n_samples // batch_size
batches_seq = np.arange(n_batches)
samples_seq = np.arange(n_samples)

Expand Down Expand Up @@ -312,7 +314,7 @@ def reverse_nn_input(dataset, service_tokens):
"""
# Swap last utterance of x with y, while padding with start- and eos-tokens
y_output = np.full(dataset.y.shape, service_tokens.pad_token_id, dtype=dataset.y.dtype)
for y_output_sample, x_input_sample in izip(y_output, dataset.x[:, -1]):
for y_output_sample, x_input_sample in zip(y_output, dataset.x[:, -1]):
# Write start token at the first index
y_output_sample[0] = service_tokens.start_token_id
y_output_token_index = 1
Expand All @@ -330,7 +332,7 @@ def reverse_nn_input(dataset, service_tokens):

# Use utterances from y in x while truncating start- and eos-tokens
x_output = np.full(dataset.x.shape, service_tokens.pad_token_id, dtype=dataset.x.dtype)
for x_output_sample, x_input_sample, y_input_sample in izip(x_output, dataset.x[:, :-1], dataset.y):
for x_output_sample, x_input_sample, y_input_sample in zip(x_output, dataset.x[:, :-1], dataset.y):
# Copy all the context utterances except the last one right to the output
x_output_sample[:-1] = x_input_sample
x_output_token_index = 0
Expand Down Expand Up @@ -359,7 +361,7 @@ def _get_x_data_iterator_with_context(x_data_iterator, y_data_iterator, context_
context = []

last_y_line = None
for x_line, y_line in izip(x_data_iterator, y_data_iterator):
for x_line, y_line in zip(x_data_iterator, y_data_iterator):
if x_line != last_y_line:
context = [] # clear context if last response != current dialog context (new dialog)

Expand All @@ -380,7 +382,7 @@ def transform_lines_to_nn_input(tokenized_dialog_lines, token_to_index):

x_data_iterator = islice(x_data_iterator, 0, None, 2)
y_data_iterator = islice(y_data_iterator, 1, None, 2)
n_dialogs /= 2
n_dialogs //= 2

y_data_iterator, y_data_iterator_for_context = file_buffered_tee(y_data_iterator)
x_data_iterator = _get_x_data_iterator_with_context(x_data_iterator, y_data_iterator_for_context)
Expand Down
12 changes: 10 additions & 2 deletions cakechat/dialog_model/quality/logging.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,18 @@
import os
import subprocess
import sys
import time
from collections import namedtuple
from datetime import datetime

import unicodecsv as csv
from six.moves import xrange

# UnicodeCSV requires files to be opened as binary on Python3 by design.
# https://github.com/jdunck/python-unicodecsv/issues/65
if sys.version_info[0] == 2:
import unicodecsv as csv
else:
import csv

from cakechat.config import DATA_DIR, PREDICTION_MODE_FOR_TESTS, LOG_CANDIDATES_NUM, MAX_PREDICTIONS_LENGTH
from cakechat.dialog_model.inference import get_nn_responses
Expand Down Expand Up @@ -54,7 +62,7 @@ def _get_iteration_stats(stats_info):


def init_csv_writer(fh, mode, output_seq_len):
csv_writer = csv.writer(fh, encoding='utf-8', delimiter='\t')
csv_writer = csv.writer(fh, delimiter='\t')
csv_writer.writerow(['']) # empty row for better readability
csv_writer.writerow([_NN_MODEL_PARAMS_STR])
csv_writer.writerow(['commit hash: %s' % _get_git_revision_short_hash()])
Expand Down
2 changes: 2 additions & 0 deletions cakechat/dialog_model/quality/metrics/distinctness.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
import numpy as np
from six.moves import xrange


from cakechat.config import PREDICTION_MODE_FOR_TESTS, DEFAULT_TEMPERATURE, BEAM_SIZE, \
PREDICTION_DISTINCTNESS_NUM_TOKENS
Expand Down
2 changes: 1 addition & 1 deletion cakechat/dialog_model/quality/metrics/ranking.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
def compute_average_precision(expected_answers, weighted_actual_answers, top):
actual_responses, actual_weights = zip(*weighted_actual_answers.items())

expected_labels = map(lambda response: int(response in expected_answers), actual_responses)[:top]
expected_labels = [int(response in expected_answers) for response in actual_responses][:top]
actual_weights = actual_weights[:top]

if any(expected_labels):
Expand Down
2 changes: 2 additions & 0 deletions cakechat/dialog_model/train.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
import os
import time

from six.moves import xrange

from cakechat.config import MAX_PREDICTIONS_LENGTH, BATCH_SIZE, EPOCHES_NUM, LOG_FREQUENCY_PER_BATCHES, \
SCREEN_LOG_FREQUENCY_PER_BATCHES, SCREEN_LOG_NUM_TEST_LINES, SHUFFLE_TRAINING_BATCHES, PREDICTION_MODE_FOR_TESTS, \
PREDICTION_MODES, LOG_CANDIDATES_NUM, VAL_SUBSET_SIZE, LOG_LOSS_DECAY
Expand Down
4 changes: 2 additions & 2 deletions cakechat/utils/files_utils.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import os
import codecs
from abc import abstractmethod, ABCMeta
import cPickle as pickle
from six.moves import cPickle as pickle

from cakechat.utils.logger import get_logger

Expand Down Expand Up @@ -50,7 +50,7 @@ def load_file(file_path, filter_empty_lines=True):
with codecs.open(file_path, 'r', 'utf-8') as fh:
lines = [line.strip() for line in fh.readlines()]
if filter_empty_lines:
lines = filter(None, lines)
lines = list(filter(None, lines))

return lines

Expand Down
5 changes: 4 additions & 1 deletion cakechat/utils/offense_detector/detector.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,7 @@
import nltk
from six import string_types
from six.moves import xrange


from cakechat.utils.files_utils import load_file
from cakechat.utils.text_processing import get_tokens_sequence
Expand All @@ -25,7 +28,7 @@ def _get_ngrams(self, tokenized_line):
return flatten(ngrams, constructor=set)

def has_offensive_ngrams(self, text_or_tokenized_text):
if isinstance(text_or_tokenized_text, basestring):
if isinstance(text_or_tokenized_text, string_types):
tokenized_text = get_tokens_sequence(text_or_tokenized_text)
elif isinstance(text_or_tokenized_text, list):
tokenized_text = text_or_tokenized_text
Expand Down
2 changes: 1 addition & 1 deletion cakechat/utils/s3/resolver.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,6 @@ def _resolve(self):
bucket.download(remote_path, self._file_path)
return True
except Exception as e:
self._logger.warn('File can not be downloaded from AWS S3 because: %s' % e.message)
self._logger.warn('File can not be downloaded from AWS S3 because: %s' % str(e))

return False
8 changes: 6 additions & 2 deletions cakechat/utils/tee_file.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,15 @@
import cPickle as pickle
import os
import tempfile

from six.moves import xrange
from six.moves import cPickle as pickle

# Pickle on HIGHEST_PROTOCOL breaks on Python 3.6.5
_PICKLE_PROTOCOL = 2

def _pickle_iterable(filename, iterable):
with open(filename, 'wb') as pickle_fh:
pklr = pickle.Pickler(pickle_fh, pickle.HIGHEST_PROTOCOL)
pklr = pickle.Pickler(pickle_fh, _PICKLE_PROTOCOL)
for entry in iterable:
pklr.dump(entry)
pklr.clear_memo()
Expand Down
3 changes: 2 additions & 1 deletion cakechat/utils/telegram_bot_client.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from abc import ABCMeta, abstractmethod
from six import iteritems

import telepot
import telepot.loop
Expand Down Expand Up @@ -74,7 +75,7 @@ def _bot_info():

def _send_bot_help(self, _):
help_lines = [self._bot_info(), '', 'List of available commands:']
for command, (_, description) in self._command_to_handler.iteritems():
for command, (_, description) in iteritems(self._command_to_handler):
help_lines.append('/{} - {}'.format(command, description))

return self._send_text('\n'.join(help_lines))
Expand Down
8 changes: 4 additions & 4 deletions cakechat/utils/text_processing/dialog.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from itertools import imap
from six.moves import map
from operator import itemgetter

from cakechat.utils.tee_file import file_buffered_tee
Expand All @@ -24,9 +24,9 @@ def get_dialog_lines_and_conditions(dialog_lines, text_field_name, condition_fie
Splits one dialog_lines generator into two generators - one for conditions and one for dialog lines
"""
conditions_iter, dialog_lines_iter = file_buffered_tee(
imap(lambda line: [line[condition_field_name], line[text_field_name]], dialog_lines))
conditions_iter = imap(itemgetter(0), conditions_iter)
dialog_lines_iter = imap(itemgetter(1), dialog_lines_iter)
map(lambda line: [line[condition_field_name], line[text_field_name]], dialog_lines))
conditions_iter = map(itemgetter(0), conditions_iter)
dialog_lines_iter = map(itemgetter(1), dialog_lines_iter)
return dialog_lines_iter, conditions_iter


Expand Down
Loading

0 comments on commit ec68708

Please sign in to comment.