From 30b279544381240586a01087a2f418ed3e8589c7 Mon Sep 17 00:00:00 2001 From: Nick Dingwall Date: Tue, 16 Apr 2019 09:40:58 -0700 Subject: [PATCH 01/11] Embed weights in cost function --- mittens/tf_mittens.py | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/mittens/tf_mittens.py b/mittens/tf_mittens.py index eb61d4e..a2416fb 100644 --- a/mittens/tf_mittens.py +++ b/mittens/tf_mittens.py @@ -58,7 +58,7 @@ def _fit(self, X, weights, log_coincidence, self._build_graph(vocab, initial_embedding_dict) # Optimizer set-up: - self.cost = self._get_cost_function() + self.cost = self._get_cost_function(weights, log_coincidence) self.optimizer = self._get_optimizer() # Set up logging for Tensorboard @@ -150,18 +150,20 @@ def _build_graph(self, vocab, initial_embedding_dict): self.bc = self._weight_init(self.n_words, 1, 'bc') self.model = tf.tensordot(self.W, tf.transpose(self.C), axes=1) + \ - tf.tensordot(self.bw, tf.transpose(self.ones), axes=1) + \ - tf.tensordot(self.ones, tf.transpose(self.bc), axes=1) + tf.tensordot(self.bw, tf.transpose(self.ones), axes=1) + \ + tf.tensordot(self.ones, tf.transpose(self.bc), axes=1) - def _get_cost_function(self): + def _get_cost_function(self, weights, log_coincidence): """Compute the cost of the Mittens objective function. If self.mittens = 0, this is the same as the cost of GloVe. """ - self.weights = tf.placeholder( - tf.float32, shape=[self.n_words, self.n_words]) - self.log_coincidence = tf.placeholder( - tf.float32, shape=[self.n_words, self.n_words]) + self.weights = tf.Variable(weights, + dtype=tf.float32, + trainable=False) + self.log_coincidence = tf.Variable(log_coincidence, + dtype=tf.float32, + trainable=False) self.diffs = tf.subtract(self.model, self.log_coincidence) cost = tf.reduce_sum( 0.5 * tf.multiply(self.weights, tf.square(self.diffs))) From 87786479ba3d8c3fcc280d3d204ec1f9beb34ef9 Mon Sep 17 00:00:00 2001 From: Nick Dingwall Date: Tue, 16 Apr 2019 09:41:29 -0700 Subject: [PATCH 02/11] Use optimized minimize function unless debugging --- mittens/tf_mittens.py | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/mittens/tf_mittens.py b/mittens/tf_mittens.py index a2416fb..4532014 100644 --- a/mittens/tf_mittens.py +++ b/mittens/tf_mittens.py @@ -59,7 +59,7 @@ def _fit(self, X, weights, log_coincidence, # Optimizer set-up: self.cost = self._get_cost_function(weights, log_coincidence) - self.optimizer = self._get_optimizer() + self.optimizer = self._get_train_func() # Set up logging for Tensorboard if self.log_dir: @@ -184,17 +184,21 @@ def _tf_squared_euclidean(X, Y): """ return tf.reduce_sum(tf.pow(tf.subtract(X, Y), 2), axis=1) - def _get_optimizer(self): + def _get_train_func(self): """Uses Adagrad to optimize the GloVe/Mittens objective, as specified in the GloVe paper. """ optim = tf.train.AdagradOptimizer(self.learning_rate) - gradients = optim.compute_gradients(self.cost) - if self.log_dir: - for name, (g, v) in zip(['W', 'C', 'bw', 'bc'], gradients): - tf.summary.histogram("{}_grad".format(name), g) - tf.summary.histogram("{}_vals".format(name), v) - return optim.apply_gradients(gradients) + if self.DEBUG: + gradients = optim.compute_gradients(self.cost) + if self.log_dir: + for name, (g, v) in zip(['W', 'C', 'bw', 'bc'], gradients): + tf.summary.histogram("{}_grad".format(name), g) + tf.summary.histogram("{}_vals".format(name), v) + return optim.apply_gradients(gradients) + else: + return optim.minimize(self.cost, + global_step=tf.train.get_or_create_global_step()) def _weight_init(self, m, n, name): """ From 36903c28da42540d91ddaccd98abb1db516bc8cd Mon Sep 17 00:00:00 2001 From: Andrei Damian Date: Thu, 9 Apr 2020 09:12:59 +0300 Subject: [PATCH 03/11] test 1 --- mittens/tf_mittens.py | 39 ++++++++++++++++++++++++--------------- 1 file changed, 24 insertions(+), 15 deletions(-) diff --git a/mittens/tf_mittens.py b/mittens/tf_mittens.py index 6d3cf4f..3045ac3 100644 --- a/mittens/tf_mittens.py +++ b/mittens/tf_mittens.py @@ -212,24 +212,33 @@ class GloVe(Mittens, GloVeBase): second=_DESC.format(model=GloVeBase._MODEL)) -if __name__ == '__main__': +def _make_word_word_matrix(n=50): + """Returns a symmetric matrix where the entries are drawn from a + Poisson distribution""" + base = np.random.zipf(2, size=(n, n)) - 1 + return base + base.T - X = np.array([ - [10.0, 2.0, 3.0, 4.0], - [ 2.0, 10.0, 4.0, 1.0], - [ 3.0, 4.0, 10.0, 2.0], - [ 4.0, 1.0, 2.0, 10.0]]) +if __name__ == '__main__': + + - glove = GloVe(n=5, max_iter=5000) - G = glove.fit(X) +# X = np.array([ +# [10.0, 2.0, 3.0, 4.0], +# [ 2.0, 10.0, 4.0, 1.0], +# [ 3.0, 4.0, 10.0, 2.0], +# [ 4.0, 1.0, 2.0, 10.0]]) +# + X = _make_word_word_matrix(n=10000) + glove = GloVe(n=128, max_iter=5000) + G = glove.fit(X) - print("\nLearned vectors:") - print(G) + print("\nLearned vectors:") + print(G) - print("We expect the dot product of learned vectors " - "to be proportional to the co-occurrence counts. " - "Let's see how close we came:") + print("We expect the dot product of learned vectors " + "to be proportional to the co-occurrence counts. " + "Let's see how close we came:") - corr = np.corrcoef(G.dot(G.T).ravel(), X.ravel())[0][1] + corr = np.corrcoef(G.dot(G.T).ravel(), X.ravel())[0][1] - print("Pearson's R: {} ".format(corr)) + print("Pearson's R: {} ".format(corr)) From 309aa24de4f911ccbcbe2f582e51a4a4e686a80c Mon Sep 17 00:00:00 2001 From: Andrei Damian Date: Thu, 9 Apr 2020 09:24:27 +0300 Subject: [PATCH 04/11] verbosity --- mittens/mittens_base.py | 2 +- mittens/tf_mittens.py | 18 +++++++++++++++--- 2 files changed, 16 insertions(+), 4 deletions(-) diff --git a/mittens/mittens_base.py b/mittens/mittens_base.py index 815e529..439ab16 100644 --- a/mittens/mittens_base.py +++ b/mittens/mittens_base.py @@ -163,7 +163,7 @@ def _progressbar(self, msg, iter_num): if self.display_progress and \ (iter_num + 1) % self.display_progress == 0: sys.stderr.write('\r') - sys.stderr.write("Iteration {}: {}".format(iter_num + 1, msg)) + sys.stderr.write("Iteration {}: {}\t\t\t".format(iter_num + 1, msg)) sys.stderr.flush() def __repr__(self): diff --git a/mittens/tf_mittens.py b/mittens/tf_mittens.py index 3045ac3..386e3fb 100644 --- a/mittens/tf_mittens.py +++ b/mittens/tf_mittens.py @@ -17,10 +17,14 @@ except ImportError: import tensorflow as tf +from time import time + from mittens.mittens_base import randmatrix, noise from mittens.mittens_base import MittensBase, GloVeBase + + _FRAMEWORK = "TensorFlow" _DESC = """ This version is faster than the NumPy version. If you prefer NumPy @@ -76,6 +80,7 @@ def _fit(self, X, weights, log_coincidence, merged_logs = tf.summary.merge_all() for i in range(1, self.max_iter+1): + t1 = time() _, loss, stats = self.sess.run( [self.optimizer, self.cost, merged_logs], feed_dict={ @@ -86,17 +91,24 @@ def _fit(self, X, weights, log_coincidence, if self.log_dir and i % 10 == 0: log_writer.add_summary(stats) self.errors.append(loss) - + t2 = time() + t_elapsed = t2 - t1 if loss < self.tol: # Quit early if tolerance is met self._progressbar("stopping with loss < self.tol", i) break else: - self._progressbar("loss: {}".format(loss), i) + self._progressbar("loss: {}, time: {:.2f} s/itr".format( + loss, + t_elapsed), i) # Return the sum of the two learned matrices, as recommended # in the paper: - return self.sess.run(tf.add(self.W, self.C)) + return self._get_embeds() + + + def _get_embeds(self): + return self.sess.run(tf.add(self.W, self.C)) def _build_graph(self, vocab, initial_embedding_dict): """Builds the computatation graph. From 7e2fd38764f869591743c0823e569d143741160e Mon Sep 17 00:00:00 2001 From: Andrei Damian Date: Thu, 9 Apr 2020 09:55:34 +0300 Subject: [PATCH 05/11] updated to tf1/tf2 and finished GPU implementation --- mittens/tf_mittens.py | 58 ++++++++++++++++++++++++++++++++++++++----- 1 file changed, 52 insertions(+), 6 deletions(-) diff --git a/mittens/tf_mittens.py b/mittens/tf_mittens.py index 4532014..1a198ae 100644 --- a/mittens/tf_mittens.py +++ b/mittens/tf_mittens.py @@ -17,7 +17,15 @@ import os import numpy as np -import tensorflow as tf +# Try to accommodate TensorFlow v1 and v2: +try: + import tensorflow.compat.v1 as tf + tf.disable_eager_execution() +except ImportError: + import tensorflow as tf + +from time import time + from mittens.mittens_base import randmatrix, noise from mittens.mittens_base import MittensBase, GloVeBase @@ -49,6 +57,8 @@ def _fit(self, X, weights, log_coincidence, if fixed_initialization is not None: raise AttributeError("Tensorflow version of Mittens does " "not support specifying initializations.") + + self.DEBUG = False # Start the session: tf.reset_default_graph() @@ -78,23 +88,28 @@ def _fit(self, X, weights, log_coincidence, merged_logs = tf.summary.merge_all() for i in range(1, self.max_iter+1): + t1 = time() _, loss, stats = self.sess.run( [self.optimizer, self.cost, merged_logs], - feed_dict={ - self.weights: weights, - self.log_coincidence: log_coincidence}) +# feed_dict={ +# self.weights: weights, +# self.log_coincidence: log_coincidence +# } + ) # Keep track of losses if self.log_dir and i % 10 == 0: log_writer.add_summary(stats, global_step=i) self.errors.append(loss) - + t2 = time() + t_elapsed = t2 - t1 if loss < self.tol: # Quit early if tolerance is met self._progressbar("stopping with loss < self.tol", i) break else: - self._progressbar("loss: {}".format(loss), i) + self._progressbar("loss: {}, time: {:.2f} s/itr".format( + loss, t_elapsed), i) # Return the sum of the two learned matrices, as recommended # in the paper: @@ -218,3 +233,34 @@ class GloVe(Mittens, GloVeBase): __doc__ = GloVeBase.__doc__.format( framework=_FRAMEWORK, second=_DESC.format(model=GloVeBase._MODEL)) + +def _make_word_word_matrix(n=50): + """Returns a symmetric matrix where the entries are drawn from a + Poisson distribution""" + base = np.random.zipf(2, size=(n, n)) - 1 + return base + base.T + +if __name__ == '__main__': + + + +# X = np.array([ +# [10.0, 2.0, 3.0, 4.0], +# [ 2.0, 10.0, 4.0, 1.0], +# [ 3.0, 4.0, 10.0, 2.0], +# [ 4.0, 1.0, 2.0, 10.0]]) +# + X = _make_word_word_matrix(n=10000) + glove = GloVe(n=128, max_iter=5000) + G = glove.fit(X) + + print("\nLearned vectors:") + print(G) + + print("We expect the dot product of learned vectors " + "to be proportional to the co-occurrence counts. " + "Let's see how close we came:") + + corr = np.corrcoef(G.dot(G.T).ravel(), X.ravel())[0][1] + + print("Pearson's R: {} ".format(corr)) \ No newline at end of file From 91ca5c6510369c8f9c3b3d2bcbd443ed4e5d7c1c Mon Sep 17 00:00:00 2001 From: Andrei Damian Date: Thu, 9 Apr 2020 09:58:23 +0300 Subject: [PATCH 06/11] faster GPU implementation tested --- mittens/tf_mittens.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/mittens/tf_mittens.py b/mittens/tf_mittens.py index 1a198ae..efd3a79 100644 --- a/mittens/tf_mittens.py +++ b/mittens/tf_mittens.py @@ -244,11 +244,11 @@ def _make_word_word_matrix(n=50): -# X = np.array([ -# [10.0, 2.0, 3.0, 4.0], -# [ 2.0, 10.0, 4.0, 1.0], -# [ 3.0, 4.0, 10.0, 2.0], -# [ 4.0, 1.0, 2.0, 10.0]]) +# X = np.array([ +# [10.0, 2.0, 3.0, 4.0], +# [ 2.0, 10.0, 4.0, 1.0], +# [ 3.0, 4.0, 10.0, 2.0], +# [ 4.0, 1.0, 2.0, 10.0]]) # X = _make_word_word_matrix(n=10000) glove = GloVe(n=128, max_iter=5000) From c06b07c0ad42100611361c1e294574fdc2647dc9 Mon Sep 17 00:00:00 2001 From: Andrei Damian Date: Thu, 9 Apr 2020 12:05:47 +0300 Subject: [PATCH 07/11] added both in-GPU and session feeding approach --- mittens/mittens_base.py | 6 ++ mittens/tf_mittens.py | 171 +++++++++++++++++++++++++++++++++------- 2 files changed, 150 insertions(+), 27 deletions(-) diff --git a/mittens/mittens_base.py b/mittens/mittens_base.py index 439ab16..c392eaa 100644 --- a/mittens/mittens_base.py +++ b/mittens/mittens_base.py @@ -31,6 +31,12 @@ def __init__(self, n=100, mittens=0.1, xmax=100, alpha=0.75, self.max_iter = max_iter self.errors = list() self.test_mode = test_mode + + def message(self, obj): + if type(obj) != str: + obj = str(obj) + print("\r" + obj, flush=True) + return def fit(self, X, diff --git a/mittens/tf_mittens.py b/mittens/tf_mittens.py index d862033..7201aac 100644 --- a/mittens/tf_mittens.py +++ b/mittens/tf_mittens.py @@ -39,10 +39,76 @@ class Mittens(MittensBase): __doc__ = MittensBase.__doc__.format( framework=_FRAMEWORK, second=_DESC.format(model=MittensBase._MODEL)) + + def __init__(self, + DEBUG=False, + no_feeds=True, + save_folder=None, + save_iters=500, + save_opt_hist=True, + **kwargs): + super().__init__(**kwargs) + self.DEBUG = DEBUG + self.save_iters = save_iters + self.save_opt_hist = save_opt_hist + self.no_feeds = no_feeds + self.message("Tensorflow ({}) Mittens initialized with {}".format( + tf.__version__, + 'full in-GPU training (no memory feeds)' if self.no_feeds else 'memory feeds' + )) + self.save_folder = '' + if save_folder is not None: + if not os.path.isdir(save_folder): + os.makedirs(save_folder) + if os.path.isdir(save_folder): + self.save_folder = save_folder + + self._last_saved_file = None + return @property def framework(self): return _FRAMEWORK + + + def save(self, filename): + fn = os.path.join(self.save_folder, filename) + embeds = self._get_embeds() + try: + np.save(fn, embeds) + self.message('') + self.message(" Embeddings file '{}' saved.".format(fn)) + res = fn + '.npy' + except: + res = None + return res + + + def _save_status(self, itr): + if self._last_saved_file is not None: + try: + os.remove(self._last_saved_file) + except: + self.message('') + self.message("Could not remove '{}'".format(self._last_saved_file)) + fn = 'embeds_itr_{}'.format(itr) + self._last_saved_file = self.save(fn) + + def _save_optimization_history(self, skip=5): + import matplotlib.pyplot as plt + plt.style.use('ggplot') + _ = plt.figure() + ax = plt.gca() + ax.plot(np.arange(skip, len(self.errors)), self.errors[skip:]) + ax.set_title('Mittens loss history (skipped first {} iters)'.format(skip)) + ax.set_xlabel('Iterations') + ax.set_ylabel('Loss') + ax.set_yscale('log') +# ax.set_xscale('log') + plt.savefig(os.path.join(self.save_folder, 'loss.png')) + plt.close() + + def _fit(self, X, weights, log_coincidence, vocab=None, @@ -52,7 +118,7 @@ def _fit(self, X, weights, log_coincidence, raise AttributeError("Tensorflow version of Mittens does " "not support specifying initializations.") - self.DEBUG = False + print() # Start the session: tf.reset_default_graph() @@ -62,7 +128,10 @@ def _fit(self, X, weights, log_coincidence, self._build_graph(vocab, initial_embedding_dict) # Optimizer set-up: - self.cost = self._get_cost_function(weights, log_coincidence) + if self.no_feeds: + self.cost = self._get_cost_function(weights, log_coincidence) + else: + self.cost = self._get_cost_function_with_placeholders() self.optimizer = self._get_train_func() # Set up logging for Tensorboard @@ -83,12 +152,17 @@ def _fit(self, X, weights, log_coincidence, merged_logs = tf.summary.merge_all() for i in range(1, self.max_iter+1): t1 = time() + if not self.no_feeds: + feed_dict = { + self.weights: weights, + self.log_coincidence: log_coincidence + } + else: + feed_dict = None + _, loss, stats = self.sess.run( [self.optimizer, self.cost, merged_logs], -# feed_dict={ -# self.weights: weights, -# self.log_coincidence: log_coincidence -# } + feed_dict=feed_dict ) # Keep track of losses @@ -105,6 +179,13 @@ def _fit(self, X, weights, log_coincidence, self._progressbar("loss: {}, time: {:.2f} s/itr".format( loss, t_elapsed), i) + + if (i % self.save_iters) == 0: + self._save_status(i) + if self.save_opt_hist: + self._save_optimization_history() + + # Return the sum of the two learned matrices, as recommended # in the paper: @@ -192,6 +273,31 @@ def _get_cost_function(self, weights, log_coincidence): tf.summary.scalar("cost", cost) return cost + def _get_cost_function_with_placeholders(self): + """Compute the cost of the Mittens objective function. + + If self.mittens = 0, this is the same as the cost of GloVe. + """ + self.weights = tf.placeholder( + tf.float32, shape=[self.n_words, self.n_words]) + self.log_coincidence = tf.placeholder( + tf.float32, shape=[self.n_words, self.n_words]) + + self.diffs = tf.subtract(self.model, self.log_coincidence) + cost = tf.reduce_sum( + 0.5 * tf.multiply(self.weights, tf.square(self.diffs))) + if self.mittens > 0: + self.mittens = tf.constant(self.mittens, tf.float32) + cost += self.mittens * tf.reduce_sum( + tf.multiply( + self.has_embedding, + self._tf_squared_euclidean( + tf.add(self.W, self.C), + self.original_embedding))) + tf.summary.scalar("cost", cost) + return cost + + @staticmethod def _tf_squared_euclidean(X, Y): """Squared Euclidean distance between the rows of `X` and `Y`. @@ -234,32 +340,43 @@ class GloVe(Mittens, GloVeBase): second=_DESC.format(model=GloVeBase._MODEL)) - """Returns a symmetric matrix where the entries are drawn from a + def _make_word_word_matrix(n=50): + """Returns a symmetric matrix where the entries are drawn from a Poisson distribution""" base = np.random.zipf(2, size=(n, n)) - 1 return base + base.T if __name__ == '__main__': + SIMPLE_TEST = False + USE_FULL_GPU = True + + if SIMPLE_TEST: + X = np.array([ + [10.0, 2.0, 3.0, 4.0], + [ 2.0, 10.0, 4.0, 1.0], + [ 3.0, 4.0, 10.0, 2.0], + [ 4.0, 1.0, 2.0, 10.0]]) + embed_size = 4 + else: + X = _make_word_word_matrix(10000) + embed_size = 128 + + glove = GloVe(n=embed_size, + save_folder='mittens_models', + save_iters=100, + max_iter=1000, + DEBUG=False, + no_feeds=USE_FULL_GPU) + G = glove.fit(X) + print("\nLearned vectors:") + print(G) + + print("We expect the dot product of learned vectors " + "to be proportional to the co-occurrence counts. " + "Let's see how close we came:") + + corr = np.corrcoef(G.dot(G.T).ravel(), X.ravel())[0][1] + print("Pearson's R: {} ".format(corr)) - -# X = np.array([ -# [10.0, 2.0, 3.0, 4.0], -# [ 2.0, 10.0, 4.0, 1.0], -# [ 3.0, 4.0, 10.0, 2.0], -# - X = _make_word_word_matrix(n=10000) - glove = GloVe(n=128, max_iter=5000) - G = glove.fit(X) - - print("\nLearned vectors:") - print(G) - - print("We expect the dot product of learned vectors " - "to be proportional to the co-occurrence counts. " - "Let's see how close we came:") - - corr = np.corrcoef(G.dot(G.T).ravel(), X.ravel())[0][1] - print("Pearson's R: {} ".format(corr)) - From e83ac6d88d7a5dc44f3691b4d36ca88816047bd8 Mon Sep 17 00:00:00 2001 From: Andrei Damian Date: Thu, 9 Apr 2020 15:34:29 +0300 Subject: [PATCH 08/11] added `__init__` for submodule usage (no package installation) --- __init__.py | 6 ++++++ mittens/__init__.py | 11 ++++++++++- mittens/mittens_base.py | 5 ++++- mittens/np_mittens.py | 14 ++++++++++++-- mittens/tf_mittens.py | 15 ++++++++++++--- 5 files changed, 44 insertions(+), 7 deletions(-) create mode 100644 __init__.py diff --git a/__init__.py b/__init__.py new file mode 100644 index 0000000..a63a300 --- /dev/null +++ b/__init__.py @@ -0,0 +1,6 @@ +try: + from mittens.mittens.tf_mittens import Mittens, GloVe +except ImportError: + from mittens.mittens.np_mittens import Mittens, GloVe + +__version__ = "0.2.2" diff --git a/mittens/__init__.py b/mittens/__init__.py index b4d7b69..54617b4 100644 --- a/mittens/__init__.py +++ b/mittens/__init__.py @@ -1,6 +1,15 @@ try: + try: from mittens.tf_mittens import Mittens, GloVe + except: +# print("Failed mittens.tf_mittens") + from mittens.mittens.tf_mittens import Mittens, GloVe except ImportError: +# print("Failed ANY tf_mittens") + try: from mittens.np_mittens import Mittens, GloVe + except: +# print("Failed mittens.np_mittens") + from mittens.mittens.np_mittens import Mittens, GloVe -__version__ = "0.2" +__version__ = "0.2.2" diff --git a/mittens/mittens_base.py b/mittens/mittens_base.py index c392eaa..0ce169b 100644 --- a/mittens/mittens_base.py +++ b/mittens/mittens_base.py @@ -4,7 +4,10 @@ import numpy as np -from mittens.doc import BASE_DOC, MITTENS_PARAM_DESCRIPTION +try: + from mittens.doc import BASE_DOC, MITTENS_PARAM_DESCRIPTION +except: + from mittens.mittens.doc import BASE_DOC, MITTENS_PARAM_DESCRIPTION class MittensBase(object): diff --git a/mittens/np_mittens.py b/mittens/np_mittens.py index b21a3a1..174d8db 100644 --- a/mittens/np_mittens.py +++ b/mittens/np_mittens.py @@ -16,8 +16,13 @@ """ import numpy as np -from mittens.mittens_base import randmatrix, noise -from mittens.mittens_base import MittensBase, GloVeBase +try: + from mittens.mittens_base import randmatrix, noise + from mittens.mittens_base import MittensBase, GloVeBase +except: + from mittens.mittens.mittens_base import randmatrix, noise + from mittens.mittens.mittens_base import MittensBase, GloVeBase + _FRAMEWORK = "NumPy" @@ -35,6 +40,11 @@ class Mittens(MittensBase): framework=_FRAMEWORK, second=_DESC.format(model=MittensBase._MODEL)) + def __init__(self, + **kwargs): + super().__init__(**kwargs) + self.message("NumPy Mittens initialized.") + @property def framework(self): return _FRAMEWORK diff --git a/mittens/tf_mittens.py b/mittens/tf_mittens.py index 7201aac..c5dbe94 100644 --- a/mittens/tf_mittens.py +++ b/mittens/tf_mittens.py @@ -19,8 +19,12 @@ from time import time -from mittens.mittens_base import randmatrix, noise -from mittens.mittens_base import MittensBase, GloVeBase +try: + from mittens.mittens_base import randmatrix, noise + from mittens.mittens_base import MittensBase, GloVeBase +except: + from mittens.mittens.mittens_base import randmatrix, noise + from mittens.mittens.mittens_base import MittensBase, GloVeBase @@ -121,6 +125,10 @@ def _fit(self, X, weights, log_coincidence, print() # Start the session: + if hasattr(self, 'sess'): + self.sess.close() + self.sess = None + run_config = tf.RunOptions(report_tensor_allocations_upon_oom = True) tf.reset_default_graph() self.sess = tf.InteractiveSession() @@ -162,7 +170,8 @@ def _fit(self, X, weights, log_coincidence, _, loss, stats = self.sess.run( [self.optimizer, self.cost, merged_logs], - feed_dict=feed_dict + feed_dict=feed_dict, + run_config=run_config, ) # Keep track of losses From 74fb033a04dc958cadefe52909456d1bbd57af79 Mon Sep 17 00:00:00 2001 From: Andrei Damian Date: Fri, 10 Apr 2020 08:00:49 +0300 Subject: [PATCH 09/11] various additions (verbosing, etc) --- mittens/tf_mittens.py | 37 ++++++++++++++++++++++++++++++------- 1 file changed, 30 insertions(+), 7 deletions(-) diff --git a/mittens/tf_mittens.py b/mittens/tf_mittens.py index c5dbe94..3d21445 100644 --- a/mittens/tf_mittens.py +++ b/mittens/tf_mittens.py @@ -27,6 +27,9 @@ from mittens.mittens.mittens_base import MittensBase, GloVeBase +from collections import deque + +__VER__ = '0.2.1.6' _FRAMEWORK = "TensorFlow" @@ -50,14 +53,17 @@ def __init__(self, save_folder=None, save_iters=500, save_opt_hist=True, + name='mittenstf', **kwargs): super().__init__(**kwargs) self.DEBUG = DEBUG + self.name = name self.save_iters = save_iters self.save_opt_hist = save_opt_hist self.no_feeds = no_feeds - self.message("Tensorflow ({}) Mittens initialized with {}".format( + self.message("Tensorflow ({}) Mittens v{} initialized with {}".format( tf.__version__, + __VER__, 'full in-GPU training (no memory feeds)' if self.no_feeds else 'memory feeds' )) self.save_folder = '' @@ -66,6 +72,13 @@ def __init__(self, os.makedirs(save_folder) if os.path.isdir(save_folder): self.save_folder = save_folder + self.message(" Saving in '{}' folder.".format(self.save_folder)) + else: + self.message(' No folder provided. Saving in current folder.') + + self.message(" Generating d={} embeddings for {} items".format( + self.n, + self.n_words)) self._last_saved_file = None return @@ -95,7 +108,7 @@ def _save_status(self, itr): except: self.message('') self.message("Could not remove '{}'".format(self._last_saved_file)) - fn = 'embeds_itr_{}'.format(itr) + fn = '{}_i{}k'.format(self.name, int(itr / 1000)) self._last_saved_file = self.save(fn) def _save_optimization_history(self, skip=5): @@ -109,7 +122,7 @@ def _save_optimization_history(self, skip=5): ax.set_ylabel('Loss') ax.set_yscale('log') # ax.set_xscale('log') - plt.savefig(os.path.join(self.save_folder, 'loss.png')) + plt.savefig(os.path.join(self.save_folder, '{}_loss.png'.format(self.name))) plt.close() @@ -158,6 +171,8 @@ def _fit(self, X, weights, log_coincidence, self.bc_start = self.sess.run(self.bc) merged_logs = tf.summary.merge_all() + t0 = time() + self._last_timings = deque(maxlen=1000) for i in range(1, self.max_iter+1): t1 = time() if not self.no_feeds: @@ -171,7 +186,7 @@ def _fit(self, X, weights, log_coincidence, _, loss, stats = self.sess.run( [self.optimizer, self.cost, merged_logs], feed_dict=feed_dict, - run_config=run_config, + options=run_config, ) # Keep track of losses @@ -179,15 +194,23 @@ def _fit(self, X, weights, log_coincidence, log_writer.add_summary(stats) self.errors.append(loss) t2 = time() - t_elapsed = t2 - t1 + t_l = t2 - t1 + self._last_timings.append(t_l) + t_lap = np.mean(self._last_timings) + t_elapsed = t2 - t0 + t_total = t_lap * self.max_iter + t_remain = t_total - t_elapsed if loss < self.tol: # Quit early if tolerance is met self._progressbar("stopping with loss < self.tol", i) break else: - self._progressbar("loss: {}, time: {:.2f} s/itr".format( + self._progressbar("loss: {}, time: {:.2f} s/itr, remain: {:.2f} hrs (elapsed: {:.2f} hrs out of total {:.2f} hrs)".format( loss, - t_elapsed), i) + t_lap, + t_remain / 3600, + t_elapsed / 3600, + t_total / 3600), i) if (i % self.save_iters) == 0: self._save_status(i) From 6aa0a8617808b07b17871e70e8a7cf7558542e11 Mon Sep 17 00:00:00 2001 From: Andrei Damian Date: Wed, 15 Apr 2020 12:19:16 +0300 Subject: [PATCH 10/11] update save name --- mittens/tf_mittens.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/mittens/tf_mittens.py b/mittens/tf_mittens.py index 3d21445..6a025ba 100644 --- a/mittens/tf_mittens.py +++ b/mittens/tf_mittens.py @@ -218,9 +218,10 @@ def _fit(self, X, weights, log_coincidence, self._save_optimization_history() - + #endfor iters # Return the sum of the two learned matrices, as recommended # in the paper: + self.save(self.name+'_embeds') return self._get_embeds() From 7c4a5be14da6816c7ca904488983942afa054ff1 Mon Sep 17 00:00:00 2001 From: Andrei Damian Date: Wed, 22 Apr 2020 10:34:02 +0300 Subject: [PATCH 11/11] more verbosity --- mittens/mittens_base.py | 18 +++++++++++++++--- mittens/tf_mittens.py | 8 ++++++-- 2 files changed, 21 insertions(+), 5 deletions(-) diff --git a/mittens/mittens_base.py b/mittens/mittens_base.py index 0ce169b..3da9bbb 100644 --- a/mittens/mittens_base.py +++ b/mittens/mittens_base.py @@ -1,6 +1,7 @@ from copy import copy import random import sys +from time import time import numpy as np @@ -35,10 +36,17 @@ def __init__(self, n=100, mittens=0.1, xmax=100, alpha=0.75, self.errors = list() self.test_mode = test_mode - def message(self, obj): + def message(self, obj, timer=None): if type(obj) != str: - obj = str(obj) - print("\r" + obj, flush=True) + obj = str(obj) + elapsed = 0 + if timer == 'start': + self._msg_time = time() + elif timer == 'stop': + elapsed = time() - self._msg_time + if elapsed > 0: + obj = obj + ' ({:.1f}s)'.format(elapsed) + print("\r" + obj, flush=True) return def fit(self, @@ -78,14 +86,18 @@ def fit(self, embedding of the corresponding element in `vocab`. """ + self.message("Fitting mco {}".format(X.shape)) + if fixed_initialization is not None: assert self.test_mode, \ "Fixed initialization parameters can only be provided" \ " in test mode. Initialize {} with `test_mode=True`.". \ format(self.__class__.split(".")[-1]) + self.message(" Dimensions check") self._check_dimensions( X, vocab, initial_embedding_dict ) + self.message(" Initializing weights and log(mco)") weights, log_coincidence = self._initialize(X) return self._fit(X, weights, log_coincidence, vocab=vocab, diff --git a/mittens/tf_mittens.py b/mittens/tf_mittens.py index 6a025ba..c513331 100644 --- a/mittens/tf_mittens.py +++ b/mittens/tf_mittens.py @@ -135,7 +135,7 @@ def _fit(self, X, weights, log_coincidence, raise AttributeError("Tensorflow version of Mittens does " "not support specifying initializations.") - print() + self.message("Preparing graph & session:", timer='start') # Start the session: if hasattr(self, 'sess'): @@ -146,9 +146,11 @@ def _fit(self, X, weights, log_coincidence, self.sess = tf.InteractiveSession() # Build the computation graph. + self.message(" Building graph") self._build_graph(vocab, initial_embedding_dict) # Optimizer set-up: + self.message(" Preparing cost/train function") if self.no_feeds: self.cost = self._get_cost_function(weights, log_coincidence) else: @@ -163,6 +165,7 @@ def _fit(self, X, weights, log_coincidence, log_writer = tf.summary.FileWriter(directory, flush_secs=1) # Run training + self.message(" Initializing variables") self.sess.run(tf.global_variables_initializer()) if self.test_mode: self.W_start = self.sess.run(self.W) @@ -173,6 +176,7 @@ def _fit(self, X, weights, log_coincidence, merged_logs = tf.summary.merge_all() t0 = time() self._last_timings = deque(maxlen=1000) + self.message("Done preparation session", timer='stop') for i in range(1, self.max_iter+1): t1 = time() if not self.no_feeds: @@ -392,7 +396,7 @@ def _make_word_word_matrix(n=50): [ 4.0, 1.0, 2.0, 10.0]]) embed_size = 4 else: - X = _make_word_word_matrix(10000) + X = _make_word_word_matrix(13000) embed_size = 128 glove = GloVe(n=embed_size,