Ian09 · lostsartre · Apr 16, 2018 · Apr 22, 2018 · Apr 25, 2018 · Apr 25, 2018
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1 @@
+ml_datasets/
diff --git a/.idea/workspace.xml b/.idea/workspace.xml
diff --git a/CF_NADE_alloutput.ipynb b/CF_NADE_alloutput.ipynb
@@ -0,0 +1,127 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import tensorflow as tf\n",
+    "import numpy as np"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 17,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#Parameters\n",
+    "learning_rate = 0.001\n",
+    "\n",
+    "#Network Parameters\n",
+    "movie_num = 3883\n",
+    "score_num = 5 \n",
+    "batch_size = 512\n",
+    "hidden_num = 500"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 21,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "ratings = tf.placeholder(tf.float32, shape=[None, movie_num, score_num])\n",
+    "in_mask = tf.placeholder(tf.float32, shape=[None, movie_num])\n",
+    "out_mask = tf.placeholder(tf.float32, shape=[None, movie_num])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 22,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def CF_NADE(ratings, in_mask, out_mask):\n",
+    "    bias = {'l1':tf.Variable(tf.random_normal(shape=[hidden_num])),\n",
+    "           'l2':tf.Variable(tf.random_normal(shape=[movie_num, score_num]))}\n",
+    "    weight = {'l1':tf.Variable(tf.random_normal(shape=[movie_num, score_num, hidden_num])),\n",
+    "             'l2':tf.Variable(tf.random_normal(shape=[hidden_num, movie_num, score_num]))}\n",
+    "    # dim(h) = batch_size * hidden_num\n",
+    "    h = tf.tanh(tf.add(bias['l1'], tf.tensordot(ratings\n",
+    "                                                * in_mask[:,:,np.newaxis], weight['l1'], axes=[[1,2], [0,1]])))\n",
+    "    #dim(output) = batch_size * movie_num * socre_num\n",
+    "    output = tf.add(bias['l2'], tf.tensordot(h, weight['l2'], axes=[[1], [0]]) * out_mask[:,:,np.newaxis])\n",
+    "    return output"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 24,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "output = CF_NADE(ratings, in_mask, out_mask)\n",
+    "#dim(scores_tensor) = batch_size * movie_num * score_num\n",
+    "scores_tensor = np.concatenate([np.ones((batch_size, movie_num, 1)) * i for i in range(1, 6)], axis=2)\n",
+    "#dim(batch_socres) = batch_size * movie_num\n",
+    "pred_scores = tf.reduce_sum(scores_tensor * tf.nn.softmax(output, axis=2), axis=2)\n",
+    "true_scores = tf.argmax((ratings * out_mask[:,:,np.newaxis]), axis=2) + 1\n",
+    "loss_op = tf.losses.mean_squared_error(true_scores, pred_scores)\n",
+    "optimizer = tf.train.AdamOptimizer()\n",
+    "train_op = optimizer.minimize(loss_op)\n",
+    "init = tf.global_variables_initializer()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 16,
+   "metadata": {},
+   "outputs": [
+    {
+     "ename": "NameError",
+     "evalue": "name 'epoches' is not defined",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[1;31mNameError\u001b[0m                                 Traceback (most recent call last)",
+      "\u001b[1;32m<ipython-input-16-ae8108f335b0>\u001b[0m in \u001b[0;36m<module>\u001b[1;34m()\u001b[0m\n\u001b[0;32m      1\u001b[0m \u001b[1;32mwith\u001b[0m \u001b[0mtf\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mSession\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m \u001b[1;32mas\u001b[0m \u001b[0msess\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m      2\u001b[0m     \u001b[0msess\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mrun\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0minit\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m----> 3\u001b[1;33m     \u001b[1;32mfor\u001b[0m \u001b[0mepoch\u001b[0m \u001b[1;32min\u001b[0m \u001b[0mrange\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mepoches\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m      4\u001b[0m         \u001b[1;32mfor\u001b[0m \u001b[0mbatch\u001b[0m \u001b[1;32min\u001b[0m \u001b[0mtrain_loop_stream\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mget_epoch_iterator\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m \u001b[1;31m# to do\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m      5\u001b[0m             \u001b[0mratings\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0min_mask\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mout_mask\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mbatch\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
+      "\u001b[1;31mNameError\u001b[0m: name 'epoches' is not defined"
+     ]
+    }
+   ],
+   "source": [
+    "with tf.Session() as sess:\n",
+    "    sess.run(init)\n",
+    "    for epoch in range(epoches):\n",
+    "        for batch in train_loop_stream.get_epoch_iterator(): # to do\n",
+    "            ratings, in_mask, out_mask = batch\n",
+    "            sess.run(train_op, feed_dict={ratings:ratings, in_mask:in_mask, out_mask:out_mask})\n",
+    "        loss = sess.run(loss_op, feed_dict={ratings:ratings, in_mask:inmask, out_mask:outmask})\n",
+    "        print ('epoch:', epoch, 'loss:', loss)"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.6.3"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/README b/README
@@ -1,4 +1,74 @@
-Code for "A Neural Autoregressive Approach to Collaborative Filtering", ICML 2016
+//
+//                       _oo0oo_
+//                      o8888888o
+//                      88" . "88
+//                      (| -_- |)
+//                      0\  =  /0
+//                    ___/`---'\___
+//                  .' \\|     |// '.
+//                 / \\|||  :  |||// \
+//                / _||||| -:- |||||- \
+//               |   | \\\  -  /// |   |
+//               | \_|  ''\---/''  |_/ |
+//               \  .-\__  '-'  ___/-. /
+//             ___'. .'  /--.--\  `. .'___
+//          ."" '<  `.___\_<|>_/___.' >' "".
+//         | | :  `- \`.;`\ _ /`;.`/ - ` : | |
+//         \  \ `_.   \_ __\ /__ _/   .-` /  /
+//     =====`-.____`.___ \_____/___.-`___.-'=====
+//                       `=---='
+//
+//
+//     ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+//
+//               佛祖保佑         永无BUG
+//
+//
+
+/*code is far away from bug with the animal protecting
+    *  ┏┓　　　┏┓
+    *┏┛┻━━━┛┻┓
+    *┃　　　　　　　┃ 　
+    *┃　　　━　　　┃
+    *┃　┳┛　┗┳　┃
+    *┃　　　　　　　┃
+    *┃　　　┻　　　┃
+    *┃　　　　　　　┃
+    *┗━┓　　　┏━┛
+    *　　┃　　　┃神兽保佑
+    *　　┃　　　┃代码无BUG！
+    *　　┃　　　┗━━━┓
+    *　　┃　　　　　　　┣┓
+    *　　┃　　　　　　　┏┛
+    *　　┗┓┓┏━┳┓┏┛
+    *　　　┃┫┫　┃┫┫
+    *　　　┗┻┛　┗┻┛ 
+    *　　　
+    */
+
+
+“”“    
+天灵灵，地灵灵，奉请祖师来显灵。
+
+祖师我来拜一拜，倾听小弟内心音。
+
+一请莱尊二进制，二请巴贝奇创雏形。
+
+三请艾达写算法，四请诺依曼模式新。
+
+五请阿兰俏图灵，六请里奇和汤普逊。
+
+七请网络三鼻祖，八请盖茨比广进金。
+
+九请理查推开源，十请林纳斯再更新。
+
+最后再请BAT，保佑代码靓又金！
+”“”
+
+
+New implementation on code for "A Neural Autoregressive Approach to Collaborative Filtering", ICML 2016
+
+See in /new_implement
 
 Dependency:
 
@@ -14,4 +84,4 @@ test CF-NADE.
 
 And example to run the code is:
 
-MovieLens1M=/Users/yin.zheng/ml_datasets/MovieLens1M-shuffle-itembased-0 python learner_ordinalcost_directly_itembased_newsoftmax_timing.py 512 10 60 0.001 0.1 0.001 1e-8 500 tanh 0 0.02 Adam 0 1 0.995 /tmp/cfnade
+MovieLens1M=/Users/yin.zheng/ml_datasets/MovieLens1M-shuffle-itembased-0 python learner_ordinalcost_directly_itembased_newsoftmax_timing.py 512 10 60 0.001 0.1 0.001 1e-8 500 tanh 0 0.02 Adam 0 1 0.995 /tmp/cfnade
diff --git a/datasets/movielens_1m_shuffle_itermbased.py b/datasets/movielens_1m_shuffle_itermbased.py
@@ -43,10 +43,10 @@ def write_movie_data(ratings, data_path, output, seed):
     cnt_u = 0
     cnt_i = 0
     for user_id, mov_id, rating, _ in ratings:
-        if user_id not in users.keys():
+        if user_id not in list(users.keys()):
             users[user_id] = cnt_u
             cnt_u += 1
-        if mov_id not in movs.keys():
+        if mov_id not in list(movs.keys()):
             movs[mov_id] = cnt_i
             cnt_i += 1
     n_users = len(users)
@@ -163,12 +163,12 @@ def write_movie_data(ratings, data_path, output, seed):
     f.close()
 
     f = open(os.path.join(output, 'user_dict'), 'wb')
-    import cPickle
-    cPickle.dump(users, f)
+    import pickle
+    pickle.dump(users, f)
     f.close()
 
     f = open(os.path.join(output, 'movie_dict'), 'wb')
-    cPickle.dump(movs, f)
+    pickle.dump(movs, f)
     f.close()
 
 
@@ -183,28 +183,28 @@ def main(data_path, output, seed):
     write_movie_data(ratings, data_path, output, seed)
 
 if __name__ == "__main__":
-#     main("/Users/yin.zheng/Downloads/ml-1m",
-#          "/Users/yin.zheng/ml_datasets/MovieLens1M-shuffle-itembased-0",
+#     main("./../ml-1m",
+#          "./../ml_datasets/MovieLens1M-shuffle-itembased-0",
 #          1234)
-    print '1'
-    main("/Users/yin.zheng/Downloads/ml-1m",
-         "/Users/yin.zheng/ml_datasets/MovieLens1M-shuffle-itembased-1",
+    print('1')
+    main("./../ml-1m",
+         "./../ml_datasets/MovieLens1M-shuffle-itembased-1",
          2341)
-    print '2'
-    main("/Users/yin.zheng/Downloads/ml-1m",
-         "/Users/yin.zheng/ml_datasets/MovieLens1M-shuffle-itembased-2",
+    print('2')
+    main("./../ml-1m",
+         "./../ml_datasets/MovieLens1M-shuffle-itembased-2",
          3412)
-    print '3'
-    main("/Users/yin.zheng/Downloads/ml-1m",
-         "/Users/yin.zheng/ml_datasets/MovieLens1M-shuffle-itembased-3",
+    print('3')
+    main("./../ml-1m",
+         "./../ml_datasets/MovieLens1M-shuffle-itembased-3",
          4123)
-    print '4'
-    main("/Users/yin.zheng/Downloads/ml-1m",
-         "/Users/yin.zheng/ml_datasets/MovieLens1M-shuffle-itembased-4",
+    print('4')
+    main("./../ml-1m",
+         "./../ml_datasets/MovieLens1M-shuffle-itembased-4",
          1324)
 #     from fuel.datasets import H5PYDataset
 #     
-#     trainset = H5PYDataset(os.path.join('/Users/yin.zheng/ml_datasets/MovieLens1M-shuffle-itembased', 'movielens-1m.hdf5'),
+#     trainset = H5PYDataset(os.path.join('./../ml_datasets/MovieLens1M-shuffle-itembased', 'movielens-1m.hdf5'),
 #                            which_sets = ('train',),
 #                            load_in_memory=True,
 #                            sources=('input_ratings', 'output_ratings', 'input_masks', 'output_masks')