diff --git a/.gitignore b/.gitignore index 2280041c..412888c2 100644 --- a/.gitignore +++ b/.gitignore @@ -11,3 +11,4 @@ dist/* checkpoint htmlcov mnist +/.vs diff --git a/examples/2_BasicModels/word2vec.py b/examples/2_BasicModels/word2vec.py index 094fca8c..923d8291 100644 --- a/examples/2_BasicModels/word2vec.py +++ b/examples/2_BasicModels/word2vec.py @@ -48,7 +48,7 @@ data_path = 'text8.zip' if not os.path.exists(data_path): print("Downloading the dataset... (It may take some time)") - filename, _ = urllib.urlretrieve(url, data_path) + filename, _ = urllib.request.urlretrieve(url, data_path) print("Done!") # Unzip the dataset file. Text has already been processed with zipfile.ZipFile(data_path) as f: @@ -156,6 +156,7 @@ def next_batch(batch_size, num_skips, skip_window): embedding_norm = embedding / tf.sqrt(tf.reduce_sum(tf.square(embedding), 1, keepdims=True)) cosine_sim_op = tf.matmul(X_embed_norm, embedding_norm, transpose_b=True) +tf.train.export_meta_graph(filename='word2vec.meta'); # Initialize the variables (i.e. assign their default value) init = tf.global_variables_initializer() @@ -165,16 +166,16 @@ def next_batch(batch_size, num_skips, skip_window): sess.run(init) # Testing data - x_test = np.array([word2id[w] for w in eval_words]) + x_test = np.array([word2id.get(w.encode(), 0) for w in eval_words]) average_loss = 0 - for step in xrange(1, num_steps + 1): + for step in range(1, num_steps + 1): # Get a new batch of data batch_x, batch_y = next_batch(batch_size, num_skips, skip_window) # Run training op _, loss = sess.run([train_op, loss_op], feed_dict={X: batch_x, Y: batch_y}) average_loss += loss - + if step % display_step == 0 or step == 1: if step > 1: average_loss /= display_step @@ -186,10 +187,10 @@ def next_batch(batch_size, num_skips, skip_window): if step % eval_step == 0 or step == 1: print("Evaluation...") sim = sess.run(cosine_sim_op, feed_dict={X: x_test}) - for i in xrange(len(eval_words)): + for i in range(len(eval_words)): top_k = 8 # number of nearest neighbors nearest = (-sim[i, :]).argsort()[1:top_k + 1] log_str = '"%s" nearest neighbors:' % eval_words[i] - for k in xrange(top_k): + for k in range(top_k): log_str = '%s %s,' % (log_str, id2word[nearest[k]]) print(log_str)