-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathcluster_old.h
More file actions
122 lines (96 loc) · 2.83 KB
/
cluster_old.h
File metadata and controls
122 lines (96 loc) · 2.83 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
#ifndef CLUSTER_ANALYSIS_CLUSTER_H
#define CLUSTER_ANALYSIS_CLUSTER_H
#include <map>
#include <stdexcept>
#include <cstring>
#include <math.h>
#include "utils/utils.h"
#include "matrix/Matrix.h"
#include <algorithm>
using std::vector;
using std::map;
using std::invalid_argument;
using std::sort;
namespace Cluster{
/**
* k均值聚类
*/
map<size_t, vector<double*>> kMeans(Matrix &m, int k);
/**
* 二分k均值聚类
*/
map<size_t, vector<double*>> binaryKMeans(Matrix &m, int k);
/**
* 选择SSE最大的一个簇,拆分成两个
*/
void splitKPoints(map<size_t, vector<double*>> &kPoints, int currentK);
/**
* 将一个簇分裂成两个簇:根据标准差最大的一维均分成两堆
*/
vector<double*>* split(vector<double*> &points);
/**
* 选出K个点作为代表点,决定的方法由参数f决定
*/
map<size_t, vector<double*>> selectKPoints(Matrix &m, int k);
/**
* 挑选k个点得一种实现方式,随机挑选k个点
*/
vector<double*> selectKPointsByRandom(Matrix &m, int k);
/**
* 计算两个点之间的距离,距离计算方式由参数p决定
*/
double getDistance(double* a, double* b);
/**
* 计算欧式距离
*/
double calED(double* a, double* b);
/**
* 判断point离k个点中哪个点最近,并将该点分配到map中
*/
int markPoint(double* point, map<size_t, vector<double*>> &kPoints);
/**
* 将matrix中所有点重新分配一遍
*/
void markAllPoints(Matrix &m, map<size_t, vector<double*>> &kPoints);
/**
* 计算这堆点的质心
*/
double* calBarycenter(vector<double*> &points);
/**
* 更新map中所有组的重心,并返回所有重心更新前后距离之和
*/
double updateKPoints(map<size_t, vector<double*>> &kPoints);
/**
* 计算一个簇的SEE
*/
double errFunc(vector<double*> &points);
/**
* 计算所有簇的SEE
*/
double totalErrFunc(map<size_t, vector<double*>> &kPoints);
/**
* 获取最大SSE的簇标号
*/
int getMaxSSECluster(map<size_t, vector<double*>> &kPoints);
/**
* 聚类算法的配置
*/
class Option{
public:
int D;
double (*disF)(double* a, double* b);
vector<double*> (*selF)(Matrix &m, int t);
double* (*barF)(vector<double*> &points);
Option(int d){
this->D = d;
this->disF = calED;
this->selF = selectKPointsByRandom;
this->barF = calBarycenter;
}
void setDisF(double (*p)(double* a, double* b));
void setSelF(vector<double*> (*p)(Matrix &m, int t));
void setBarF(double* (*p)(vector<double*> &points));
};
void setOption(Option *op);
}
#endif //CLUSTER_ANALYSIS_CLUSTER_H