-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathclust_knn.cpp
More file actions
executable file
·283 lines (232 loc) · 7.46 KB
/
clust_knn.cpp
File metadata and controls
executable file
·283 lines (232 loc) · 7.46 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
#include "clust_knn.h"
#include "misc.h"
using namespace myglut;
clust_knn::clust_knn(int _NRow, int _NCol){
NRow = _NRow;
NCol = _NCol;
select_distance_function = 0;
}
void clust_knn::reinit(int _nskip){
printf("%sclust_knn::reinit %snskip %s%d %sKNN_USE %s%d%s\n", KMAG, KGRN, KRED, _nskip, KGRN, KRED, KNN_USE, KNRM);
GLUT3d * _my3d = myglut3d;
GLUT2d * _my2d = myglut2d;
clust_knn::init(_my3d, _my2d, float_buffers, _nskip, true);
// clust_knn::init(myglut3d, myglut2d, float_buffers, _nskip, true);
}
float clust_knn::euclidean_distance(int i, int j){
int m;
float d, tmp;
d = 0.;
for0(m, N){
tmp = dat.at(j, m) - dat.at(i, m);
d += tmp * tmp;
}
if(isnan(d) || isinf(d) || isnan(-d) || isinf(-d)){
printf("euclidean_distance bad: i %d j %d\n", i, j);
for0(m, N) printf("\tdjm %f dim %f\n", dat.at(j, m), dat.at(i, m));
exit(1);
}
return d;
}
float clust_knn::distance(int i, int j){
// there were other distance functions in here.. put them back?
return euclidean_distance(i, j);
}
void clust_knn::init(GLUT3d * _my3d, GLUT2d * _my2d, vector < SA<float> * > * _float_buffers, int nskip){
init(_my3d, _my2d, _float_buffers, nskip, false);
}
//calculate nj from N and nskip
void clust_knn::init(GLUT3d * _my3d, GLUT2d * _my2d, vector < SA<float> * > * _float_buffers, int nskip, bool re_init){
printf("\tre_init %d\n", (int)re_init);
if(!re_init) D_j = NULL;
myglut3d = _my3d;
myglut2d = _my2d;
n_skip = nskip;
float_buffers = _float_buffers;
// allocate
int n = (float_buffers->at(0))->size();
N = float_buffers->size();
nj = n / nskip; // fixed for this invocation of the program
int h, i, j, k, m;
if(!re_init){
D_j = (SAS<float> **)(void *)malloc(sizeof(SAS<float> *) * nj);
for0(i, nj) D_j[i] = new SAS<float>(nj); // dmat row (could be more ragged)
// only allocate memory the first time
dE.init(nj);
dat.init(nj, N);
nnD.init(nj, KNN_MAX);
origIndex.init(nj);
nnIndex.init(nj, KNN_MAX);
printf("%sReducing data...%s\n", KGRN, KNRM);
// decimate: use 1/nskip of data
k = h = 0;
for0(i, NRow){
for0(j, NCol){
if(k >= (nj - 1)) break;
if((h % nskip) == 0){
origIndex.at(k) = h; // for the decimated data point, store it's index wrt the original data so we could retrieve the values.
for0(m, N){
dat.at(k, m) = float_buffers->at(m)->at(i, j);
(*i_coord)[m] = i;
(*j_coord)[m] = j;
}
k++;
}
h++;
}
}
}
printf("calculating sorted truncated distance matrix..\n");
if(!re_init) distance_calculation();
for0(j, nj){
int ind;
SAS<float> * D = D_j[j];
for0(i, KNN_MAX){
ind = D->index(i);
nnIndex.at(j, i) = ind;
nnD.at(j, i) = D->f(i);
}
}
printf("end init..\n");
}
void distance_calculation(){
// run distance calculation in parallel
parfor(0, myclust_knn->nj, &distance_calculation);
}
void distance_calculation(size_t j){
SAS<float> * D; // distances w.r.t. a given point
float d, tmp, x, y, z, X, Y, Z;
int ix, iy, iz, ind, i, k, m, nj;
nj = myclust_knn->nj;
int dispfact = nj / 20;
D = myclust_knn->D_j[j];
D->reset();
if(j % dispfact == 0){
printf("%s\n(%s%d%s/%s%d%s)%s<==>%s(%s%d%s/%s100%s)%s", KGRN, KRED, (int)(j + 1), KMAG, KRED, nj, KGRN, KMAG, KGRN, KRED, (int)(100. * ((float)(j + 1)) / ((float)nj)), KMAG, KRED, KGRN, KNRM);
}
for0(i, nj){
d = myclust_knn->distance(i, j);
D->f(i) = d;
if(isnan(d) || isnan(-d) || isinf(d) || isinf(-d)){
printf("bad data\n");
exit(1);
}
}
D->Sort();
}
float clust_knn::densityEstimate(int j){
int K = KNN_USE;
float sumd, d;
int i;
d = sumd = 0.;
for0(i, K){
d = nnD.at(j, i);
sumd += d;
}
d = - sumd;
if(isnan(d) || isinf(d) || isnan(-d) || isinf(-d)){
sumd = 0.;
printf("bad densityEstimate j %d K %d\n", j, K);
for0(i, K){
d = nnD.at(j, i);
sumd += d;
printf("\td %f\n", d);
}
exit(1);
}
return d;
}
int clust_knn::classf( int j, SA<int> * highestdensityneighborindex, SA<float> * highestdensityneighbordensity){
int debug = false;
if(debug) printf("classf j %d\n", j);
if(knn_indices.at(j) >= 0){
if(debug) printf("\talready classed\n");
return knn_indices.at(j); // return pre-existing label
}
if(highestdensityneighbordensity->at(j) <= dE.at(j)){
// new cluster / found density "top"
knn_J_indices.push_back(j); // store the index of the hilltop, wrt to the decimated data.
knn_indices.at(j) = nkci; // store the index of the new cluster, wrt to the decimated data point.
if(debug)printf("\tat top: j %d nkci %d\n", j, nkci);
return(nkci++);
}
else{
int nextJ = highestdensityneighborindex->at(j); // move towards peak
if(debug) printf("\t move to peak: hdnd %f nextJ %d\n", (float) highestdensityneighbordensity->at(nextJ), nextJ);
int recursiveclass = classf(nextJ, highestdensityneighborindex, highestdensityneighbordensity);
return recursiveclass;
}
}
void clust_knn::knn_clustering(){
printf("start knn clustering...\n");
if(KNN_USE > KNN_MAX){
printf("K %d KMax %d\n", KNN_USE, KNN_MAX);
printf("Error: you have selected K>K_Max.\n");
exit(1);
}
int j,i;
for0(j, nj) dE[j] = densityEstimate(j);
if(false){
// debug printout for density estimates
for0(j, nj) printf("%f ", dE[j]);
printf("\n");
}
knn_indices.init(nj); //these are the labels for the clustering.
SA<int> hdni(nj); //highest density neighbor index.
SA<float> hdnd(nj); //highest density neighbor density.
SAS <float> D(KNN_USE); //list the densities of the neighbors of the current point.
int ni; //neighbor index.
int chdni; //current highest density neighbor index.
// sorted list of density estimates of neighbours
for0(j, nj){
D.reset();
for0(i, KNN_USE){
ni = nnIndex.at(j,i);
D.f(i) = dE[ni];
}
D.Sort(); // increasing order
chdni = D.index(KNN_USE - 1);
chdni = nnIndex.at(j, chdni);
hdni[j] = chdni;
hdnd[j] = D.f(KNN_USE - 1);
}
knn_J_indices.clear();
nkci = 0;
// run the classification
for0(j, nj) knn_indices[j] = -1;
for0(j, nj) knn_indices[j] = classf(j, &(hdni), &(hdnd));
// how many centres?
n_knn_centres = nkci;
number_of_classes = n_knn_centres;
printf("Number of classes: %d\n", nkci);
myglut3d->lock = true;
int N = float_buffers->size();
myglut3d->class_centres.clear();
for0(i, knn_J_indices.size()){
int myind = knn_J_indices.at(i);
for0(j, N) myglut3d->class_centres.push_back(dat.at(myind, j));
}
// calculate class membership
classmembers.clear();
for0(i, n_knn_centres){
vector<unsigned int> a;
classmembers.push_back(a);
}
for0(j, nj) classmembers[knn_indices[j]].push_back(j);
myglut3d->lock = false;
myglut2d->unlock();
myglut2d->mark();
myglut3d->mark();
}
int clust_knn::get_n_knn_centres(){
return n_knn_centres; // number of classes
}
int clust_knn::get_n_knn_elements(int centre_index){
return (classmembers[centre_index]).size(); // number of dat elements for a given class
}
float clust_knn::get_centre_coord(int centre_index, int elem_ind, int coord){
return dat.at((classmembers[centre_index])[elem_ind], coord); // i'th coordinate of an N-dimensional dat element
}
float clust_knn::get_centre_coord( int centre_index, int coord){
return dat.at(knn_J_indices.at(centre_index), coord); // i'th coordinate of "representative" element of cluster ("hill top")
}