-
Notifications
You must be signed in to change notification settings - Fork 7
Expand file tree
/
Copy pathfastrp.py
More file actions
89 lines (79 loc) · 3.46 KB
/
fastrp.py
File metadata and controls
89 lines (79 loc) · 3.46 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
import csv
import itertools
import math
import matplotlib
import time
import logging
import sys
import os
import random
import warnings
import gensim
import pandas as pd
import numpy as np
from tqdm import tqdm_notebook as tqdm
from collections import Counter, defaultdict
from pathlib import Path
from sklearn import random_projection
from sklearn.preprocessing import normalize, scale, MultiLabelBinarizer
from scipy.sparse import coo_matrix, csr_matrix, csc_matrix, spdiags
# projection method: choose from Gaussian and Sparse
# input matrix: choose from adjacency and transition matrix
# alpha adjusts the weighting of nodes according to their degree
def fastrp_projection(A, q=3, dim=128, projection_method='gaussian', input_matrix='adj', alpha=None):
assert input_matrix == 'adj' or input_matrix == 'trans'
assert projection_method == 'gaussian' or projection_method == 'sparse'
if input_matrix == 'adj':
M = A
else:
N = A.shape[0]
normalizer = spdiags(np.squeeze(1.0 / csc_matrix.sum(A, axis=1) ), 0, N, N)
M = normalizer @ A
# Gaussian projection matrix
if projection_method == 'gaussian':
transformer = random_projection.GaussianRandomProjection(n_components=dim, random_state=42)
# Sparse projection matrix
else:
transformer = random_projection.SparseRandomProjection(n_components=dim, random_state=42)
Y = transformer.fit(M)
# Random projection for A
if alpha is not None:
Y.components_ = Y.components_ @ spdiags( \
np.squeeze(np.power(csc_matrix.sum(A, axis=1), alpha)), 0, N, N)
cur_U = transformer.transform(M)
U_list = [cur_U]
for i in range(2, q + 1):
cur_U = M @ cur_U
U_list.append(cur_U)
return U_list
# When weights is None, concatenate instead of linearly combines the embeddings from different powers of A
def fastrp_merge(U_list, weights, normalization=False):
dense_U_list = [_U.todense() for _U in U_list] if type(U_list[0]) == csc_matrix else U_list
_U_list = [normalize(_U, norm='l2', axis=1) for _U in dense_U_list] if normalization else dense_U_list
if weights is None:
return np.concatenate(_U_list, axis=1)
U = np.zeros_like(_U_list[0])
for cur_U, weight in zip(_U_list, weights):
U += cur_U * weight
# U = scale(U.todense())
# U = normalize(U.todense(), norm='l2', axis=1)
return scale(U.todense()) if type(U) == csr_matrix else scale(U)
# A is always the adjacency matrix
# the choice between adj matrix and trans matrix is decided in the conf
def fastrp_wrapper(A, conf):
U_list = fastrp_projection(A,
q=len(conf['weights']),
dim=conf['dim'],
projection_method=conf['projection_method'],
input_matrix=conf['input_matrix'],
alpha=conf['alpha'],
)
U = fastrp_merge(U_list, conf['weights'], conf['normalization'])
return U
def get_emb_filename(prefix, conf):
return prefix + '-dim=' + str(conf['dim']) + ',projection_method=' + conf['projection_method'] \
+ ',input_matrix=' + conf['input_matrix'] + ',normalization=' + str(conf['normalization']) \
+ ',weights=' + (','.join(map(str, conf['weights'])) if conf['weights'] is not None else 'None') \
+ ',alpha=' + (str(conf['alpha']) if 'alpha' in conf else '') \
+ ',C=' + (str(conf['C']) if 'alpha' in conf else '1.0') \
+ '.mat'