Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
83 commits
Select commit Hold shift + click to select a range
69760b7
make the code executable
lostsartre Apr 16, 2018
a7b12e9
small changes
Apr 22, 2018
632be84
Create new_implement directory and create Data.py to generate train a…
Dynsk Apr 25, 2018
28471f7
add a network file
lostsartre Apr 25, 2018
df75835
Merge branch 'master' of https://github.com/lostsartre/CF-NADE
lostsartre Apr 25, 2018
f4b2227
Create Dimension.txt
Dynsk Apr 25, 2018
fc1142a
Update Dimension.txt
Dynsk Apr 25, 2018
db5ac42
new-network
Apr 25, 2018
f0f43c3
edit re-network
lostsartre Apr 25, 2018
db213d6
Modify Data.py output format
Dynsk Apr 25, 2018
777d5b2
add CF-NADE.py, main.py
lostsartre May 2, 2018
dac5888
fix bugs
lostsartre May 2, 2018
f42bc6f
...
May 2, 2018
5ecbd57
Merge branch 'lostsartre' of https://github.com/lostsartre/CF-NADE in…
May 2, 2018
d2427ed
dataLoader
lostsartre May 2, 2018
5c84acc
change data
May 2, 2018
429c51d
solve conflct
May 2, 2018
2b0eef5
Data.py
May 2, 2018
6ef3015
Fixed the problem when begin a new epoch, you need to start reading t…
Dynsk May 3, 2018
cfc462f
change Data.py
lostsartre May 4, 2018
8ad8b4a
merge Data.py
lostsartre May 4, 2018
c702f12
Merge branch 'master' into lostsartre
lostsartre May 4, 2018
317ac66
Merge pull request #1 from lostsartre/lostsartre
lostsartre May 4, 2018
0761449
fix bag
May 4, 2018
92895e4
Merge pull request #2 from lostsartre/lostsartre
lostsartre May 4, 2018
55f8b7e
Update README
Dynsk May 4, 2018
e3a9e9e
Update README
Dynsk May 4, 2018
3f47d99
update
May 4, 2018
e32f8a2
Merge pull request #3 from lostsartre/lostsartre
lostsartre May 4, 2018
b268ad2
shared
May 4, 2018
e714e17
Tab-Space
May 4, 2018
87bc844
Update README
Dynsk May 4, 2018
2658dce
change loss
May 4, 2018
d61cdd9
Cumultive
May 4, 2018
17ae3bd
mm
May 4, 2018
78e4644
Merge branch 'master' of https://github.com/lostsartre/CF-NADE
May 4, 2018
d475d39
CF_NADE
May 4, 2018
25115b4
fix bugs
May 4, 2018
1ac876d
Regular
May 4, 2018
36838d5
regularization
May 4, 2018
a83ed1f
regularization
May 4, 2018
7c3a561
Data.py: create get_batch_train and get_batch_test
Dynsk May 4, 2018
3257637
test evaluation
May 4, 2018
5b2595a
233
May 5, 2018
5fb6d1c
New_Hope
May 5, 2018
c78bc0c
full-version
May 5, 2018
e6cb2a1
full version
May 5, 2018
8cef47d
old_implement
May 5, 2018
4c995ce
mm
May 5, 2018
404f4e0
merge
May 5, 2018
aeb02a1
CF_NADE_V2
May 5, 2018
1e4d607
debug Data_user_per_sample.py
Dynsk May 5, 2018
b3e0a82
Data_user_per_sample.py: support shuffle when myData.renew_train()
Dynsk May 6, 2018
c29f1ae
epoch
May 6, 2018
837cc48
Data_user_per_sample.py: support shuffle when myData.renew_train()
Dynsk May 6, 2018
1ba665e
Merge remote-tracking branch 'origin/master'
Dynsk May 6, 2018
b63b890
epoch
May 6, 2018
4298991
Data_user_per_sample.py: roll back to no shuffle
Dynsk May 6, 2018
4fdc62b
add CF_NADE.py
Dynsk May 6, 2018
e369844
Data_user_per_sample.py: add shuffle again, original slow implement
Dynsk May 6, 2018
ed934e1
Data_user_per_sample.py: now split data into train, dev, test
Dynsk May 6, 2018
bb96238
Data_user_per_sample.py: add documents
Dynsk May 6, 2018
f3cc94c
Data_user_per_sample.py: change prepare_data.py
Dynsk May 6, 2018
1ae91d2
merge
May 6, 2018
75b4cef
Merge branch 'master' of https://github.com/lostsartre/CF-NADE
May 6, 2018
44c71f7
add dev
May 6, 2018
c4f08de
Data_timestamp.py: created
Dynsk May 7, 2018
2f47f32
Merge remote-tracking branch 'origin/master'
Dynsk May 7, 2018
5c71204
log
May 7, 2018
e667c6b
two weight regularization
May 7, 2018
eee15af
Matrix Version
May 7, 2018
7bc8307
merge
May 7, 2018
5ad32ec
Merge branch 'master' of https://github.com/lostsartre/CF-NADE
May 7, 2018
20aa913
CF_NADE_LSTM.py
May 7, 2018
27e8dc8
Data_lstm.py: created
Dynsk May 7, 2018
58a6a68
time_stamp_version
May 7, 2018
838f1fd
aa
May 7, 2018
bf71db0
time stamp
May 7, 2018
9475c04
Data_lstm.py: debug triple2vecor()
Dynsk May 7, 2018
b90bc0a
Data_lstm.py: complete
Dynsk May 7, 2018
d4037bc
..
May 7, 2018
e22116c
running
May 8, 2018
c6818f2
wrapped
May 8, 2018
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
ml_datasets/
659 changes: 659 additions & 0 deletions .idea/workspace.xml

Large diffs are not rendered by default.

127 changes: 127 additions & 0 deletions CF_NADE_alloutput.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,127 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import tensorflow as tf\n",
"import numpy as np"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {},
"outputs": [],
"source": [
"#Parameters\n",
"learning_rate = 0.001\n",
"\n",
"#Network Parameters\n",
"movie_num = 3883\n",
"score_num = 5 \n",
"batch_size = 512\n",
"hidden_num = 500"
]
},
{
"cell_type": "code",
"execution_count": 21,
"metadata": {},
"outputs": [],
"source": [
"ratings = tf.placeholder(tf.float32, shape=[None, movie_num, score_num])\n",
"in_mask = tf.placeholder(tf.float32, shape=[None, movie_num])\n",
"out_mask = tf.placeholder(tf.float32, shape=[None, movie_num])"
]
},
{
"cell_type": "code",
"execution_count": 22,
"metadata": {},
"outputs": [],
"source": [
"def CF_NADE(ratings, in_mask, out_mask):\n",
" bias = {'l1':tf.Variable(tf.random_normal(shape=[hidden_num])),\n",
" 'l2':tf.Variable(tf.random_normal(shape=[movie_num, score_num]))}\n",
" weight = {'l1':tf.Variable(tf.random_normal(shape=[movie_num, score_num, hidden_num])),\n",
" 'l2':tf.Variable(tf.random_normal(shape=[hidden_num, movie_num, score_num]))}\n",
" # dim(h) = batch_size * hidden_num\n",
" h = tf.tanh(tf.add(bias['l1'], tf.tensordot(ratings\n",
" * in_mask[:,:,np.newaxis], weight['l1'], axes=[[1,2], [0,1]])))\n",
" #dim(output) = batch_size * movie_num * socre_num\n",
" output = tf.add(bias['l2'], tf.tensordot(h, weight['l2'], axes=[[1], [0]]) * out_mask[:,:,np.newaxis])\n",
" return output"
]
},
{
"cell_type": "code",
"execution_count": 24,
"metadata": {},
"outputs": [],
"source": [
"output = CF_NADE(ratings, in_mask, out_mask)\n",
"#dim(scores_tensor) = batch_size * movie_num * score_num\n",
"scores_tensor = np.concatenate([np.ones((batch_size, movie_num, 1)) * i for i in range(1, 6)], axis=2)\n",
"#dim(batch_socres) = batch_size * movie_num\n",
"pred_scores = tf.reduce_sum(scores_tensor * tf.nn.softmax(output, axis=2), axis=2)\n",
"true_scores = tf.argmax((ratings * out_mask[:,:,np.newaxis]), axis=2) + 1\n",
"loss_op = tf.losses.mean_squared_error(true_scores, pred_scores)\n",
"optimizer = tf.train.AdamOptimizer()\n",
"train_op = optimizer.minimize(loss_op)\n",
"init = tf.global_variables_initializer()"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {},
"outputs": [
{
"ename": "NameError",
"evalue": "name 'epoches' is not defined",
"output_type": "error",
"traceback": [
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[1;31mNameError\u001b[0m Traceback (most recent call last)",
"\u001b[1;32m<ipython-input-16-ae8108f335b0>\u001b[0m in \u001b[0;36m<module>\u001b[1;34m()\u001b[0m\n\u001b[0;32m 1\u001b[0m \u001b[1;32mwith\u001b[0m \u001b[0mtf\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mSession\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m \u001b[1;32mas\u001b[0m \u001b[0msess\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 2\u001b[0m \u001b[0msess\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mrun\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0minit\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m----> 3\u001b[1;33m \u001b[1;32mfor\u001b[0m \u001b[0mepoch\u001b[0m \u001b[1;32min\u001b[0m \u001b[0mrange\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mepoches\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 4\u001b[0m \u001b[1;32mfor\u001b[0m \u001b[0mbatch\u001b[0m \u001b[1;32min\u001b[0m \u001b[0mtrain_loop_stream\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mget_epoch_iterator\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m \u001b[1;31m# to do\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 5\u001b[0m \u001b[0mratings\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0min_mask\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mout_mask\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mbatch\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
"\u001b[1;31mNameError\u001b[0m: name 'epoches' is not defined"
]
}
],
"source": [
"with tf.Session() as sess:\n",
" sess.run(init)\n",
" for epoch in range(epoches):\n",
" for batch in train_loop_stream.get_epoch_iterator(): # to do\n",
" ratings, in_mask, out_mask = batch\n",
" sess.run(train_op, feed_dict={ratings:ratings, in_mask:in_mask, out_mask:out_mask})\n",
" loss = sess.run(loss_op, feed_dict={ratings:ratings, in_mask:inmask, out_mask:outmask})\n",
" print ('epoch:', epoch, 'loss:', loss)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.3"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
74 changes: 72 additions & 2 deletions README
Original file line number Diff line number Diff line change
@@ -1,4 +1,74 @@
Code for "A Neural Autoregressive Approach to Collaborative Filtering", ICML 2016
//
// _oo0oo_
// o8888888o
// 88" . "88
// (| -_- |)
// 0\ = /0
// ___/`---'\___
// .' \\| |// '.
// / \\||| : |||// \
// / _||||| -:- |||||- \
// | | \\\ - /// | |
// | \_| ''\---/'' |_/ |
// \ .-\__ '-' ___/-. /
// ___'. .' /--.--\ `. .'___
// ."" '< `.___\_<|>_/___.' >' "".
// | | : `- \`.;`\ _ /`;.`/ - ` : | |
// \ \ `_. \_ __\ /__ _/ .-` / /
// =====`-.____`.___ \_____/___.-`___.-'=====
// `=---='
//
//
// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
//
// 佛祖保佑 永无BUG
//
//

/*code is far away from bug with the animal protecting
* ┏┓   ┏┓
*┏┛┻━━━┛┻┓
*┃       ┃  
*┃   ━   ┃
*┃ ┳┛ ┗┳ ┃
*┃       ┃
*┃   ┻   ┃
*┃       ┃
*┗━┓   ┏━┛
*  ┃   ┃神兽保佑
*  ┃   ┃代码无BUG!
*  ┃   ┗━━━┓
*  ┃       ┣┓
*  ┃       ┏┛
*  ┗┓┓┏━┳┓┏┛
*   ┃┫┫ ┃┫┫
*   ┗┻┛ ┗┻┛
*   
*/


“”“
天灵灵,地灵灵,奉请祖师来显灵。

祖师我来拜一拜,倾听小弟内心音。

一请莱尊二进制,二请巴贝奇创雏形。

三请艾达写算法,四请诺依曼模式新。

五请阿兰俏图灵,六请里奇和汤普逊。

七请网络三鼻祖,八请盖茨比广进金。

九请理查推开源,十请林纳斯再更新。

最后再请BAT,保佑代码靓又金!
”“”


New implementation on code for "A Neural Autoregressive Approach to Collaborative Filtering", ICML 2016

See in /new_implement

Dependency:

Expand All @@ -14,4 +84,4 @@ test CF-NADE.

And example to run the code is:

MovieLens1M=/Users/yin.zheng/ml_datasets/MovieLens1M-shuffle-itembased-0 python learner_ordinalcost_directly_itembased_newsoftmax_timing.py 512 10 60 0.001 0.1 0.001 1e-8 500 tanh 0 0.02 Adam 0 1 0.995 /tmp/cfnade
MovieLens1M=/Users/yin.zheng/ml_datasets/MovieLens1M-shuffle-itembased-0 python learner_ordinalcost_directly_itembased_newsoftmax_timing.py 512 10 60 0.001 0.1 0.001 1e-8 500 tanh 0 0.02 Adam 0 1 0.995 /tmp/cfnade
40 changes: 20 additions & 20 deletions datasets/movielens_1m_shuffle_itermbased.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,10 +43,10 @@ def write_movie_data(ratings, data_path, output, seed):
cnt_u = 0
cnt_i = 0
for user_id, mov_id, rating, _ in ratings:
if user_id not in users.keys():
if user_id not in list(users.keys()):
users[user_id] = cnt_u
cnt_u += 1
if mov_id not in movs.keys():
if mov_id not in list(movs.keys()):
movs[mov_id] = cnt_i
cnt_i += 1
n_users = len(users)
Expand Down Expand Up @@ -163,12 +163,12 @@ def write_movie_data(ratings, data_path, output, seed):
f.close()

f = open(os.path.join(output, 'user_dict'), 'wb')
import cPickle
cPickle.dump(users, f)
import pickle
pickle.dump(users, f)
f.close()

f = open(os.path.join(output, 'movie_dict'), 'wb')
cPickle.dump(movs, f)
pickle.dump(movs, f)
f.close()


Expand All @@ -183,28 +183,28 @@ def main(data_path, output, seed):
write_movie_data(ratings, data_path, output, seed)

if __name__ == "__main__":
# main("/Users/yin.zheng/Downloads/ml-1m",
# "/Users/yin.zheng/ml_datasets/MovieLens1M-shuffle-itembased-0",
# main("./../ml-1m",
# "./../ml_datasets/MovieLens1M-shuffle-itembased-0",
# 1234)
print '1'
main("/Users/yin.zheng/Downloads/ml-1m",
"/Users/yin.zheng/ml_datasets/MovieLens1M-shuffle-itembased-1",
print('1')
main("./../ml-1m",
"./../ml_datasets/MovieLens1M-shuffle-itembased-1",
2341)
print '2'
main("/Users/yin.zheng/Downloads/ml-1m",
"/Users/yin.zheng/ml_datasets/MovieLens1M-shuffle-itembased-2",
print('2')
main("./../ml-1m",
"./../ml_datasets/MovieLens1M-shuffle-itembased-2",
3412)
print '3'
main("/Users/yin.zheng/Downloads/ml-1m",
"/Users/yin.zheng/ml_datasets/MovieLens1M-shuffle-itembased-3",
print('3')
main("./../ml-1m",
"./../ml_datasets/MovieLens1M-shuffle-itembased-3",
4123)
print '4'
main("/Users/yin.zheng/Downloads/ml-1m",
"/Users/yin.zheng/ml_datasets/MovieLens1M-shuffle-itembased-4",
print('4')
main("./../ml-1m",
"./../ml_datasets/MovieLens1M-shuffle-itembased-4",
1324)
# from fuel.datasets import H5PYDataset
#
# trainset = H5PYDataset(os.path.join('/Users/yin.zheng/ml_datasets/MovieLens1M-shuffle-itembased', 'movielens-1m.hdf5'),
# trainset = H5PYDataset(os.path.join('./../ml_datasets/MovieLens1M-shuffle-itembased', 'movielens-1m.hdf5'),
# which_sets = ('train',),
# load_in_memory=True,
# sources=('input_ratings', 'output_ratings', 'input_masks', 'output_masks')
Expand Down
Loading