forked from deerishi/Machine-learning-algorithms
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathsupport_vector_machine_train.m
More file actions
166 lines (137 loc) · 4.48 KB
/
support_vector_machine_train.m
File metadata and controls
166 lines (137 loc) · 4.48 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
function [model] = svmTrain(X, Y, C, kernelFunction, ...
tol, max_passes)
%SVMTRAIN Trains an SVM classifier using a simplified version of the SMO
%algorithm.
% [model] = SVMTRAIN(X, Y, C, kernelFunction, tol, max_passes) trains an
% SVM classifier and returns trained model. X is the matrix of training
% examples. Each row is a training example, and the jth column holds the
% jth feature. Y is a column matrix containing 1 for positive examples
% and 0 for negative examples. C is the standard SVM regularization
% parameter. tol is a tolerance value used for determining equality of
% floating point numbers. max_passes controls the number of iterations
% over the dataset (without changes to alpha) before the algorithm quits.
%tol = 1e-3;
eps=1e-5;
max_passes = 10; %the number of max passes are done as we want that a minimum of that many number of passes in which no alpha has changed
m=size(X,1);
n=size(X,2);
alpha=zeros(m,1);
b=0;
E=zeros(m,1);
passes=0;
L=0;
H=0;
W=zeros(1,m);
number_of_changed_alphas=0;
%in this first svm training algo we will select he alpas randonmly
while(passes<max_passes)
for i=1:m
wx=0;
for h=1:m
k=kernal(X(i),X(h),kernelFunction);%kernal function is defined below
wx=wx+ alpha(h)*Y(h)*k; % this is w'x term
end
E(i)=b+wx-Y(i);
if ((Y(i)*E(i) < -tol && alphas(i) < C) || (Y(i)*E(i) > tol && alphas(i) > 0))
while(1)
j=randi(m,1);
if(j~=i)
break
end
end
wx=0;
for h=1:m
k=kernal(X(j),X(h),kernelFunction);%kernal function is defined below
wx=wx+ alpha(h)*Y(h)*k
end
E(j)=b+wx-Y(j);
%saving the old alphas
alpha1=aplha(i);
alpha2=alpha(j)
%finding the upper and lower bounds for alphas
if(Y(i)==Y(j))
L=max(0,alpha1+alpha2-C);
H=min(C,alpha1+alpha2);
else
L=max(0,alpha2-alpha1);
H=min(C,C+alpha2-alpha1);
end
if(L==H)
continue;
end
s=Y(i)*Y(j);
k1=kernal(X(i),X(i),kernelFunction);
k2=kernal(X(j),X(j),kernelFunction);
k3=kernal(X(i),X(j),kernelFunction);
eta=k1+k2-2*k3;
if(eta>0)
alpha(j)=alpha(j)+ Y(j)*((E(i)-E(j))/eta);
if(alpha(j)>=H)
alpha(j)=H;
elseif(alpha(j)<=L)
alpha(j)=L;
else
alpha(j)=alpha(j);
end
else(eta<=0)
f1=Y(i)*(E(i)+b)-alpha(i)*k1-s*alpha(j)*k3;
f2=Y(j)*(E(j)+b)-s*alpha(i)*k3-alpha(j)*k2;
L1=alpha(i)+s*(alpha(j)-L);
H1=alpha(i)+s*(alpha(j)-H);
obj_L=L1*f1+L*f2+(0.5*L1*L1*k1)+(0.5*L*L*k2)+s*L*L1*k3;
obj_H=H1*f1+H*f2+(0.5*H1*L1*k1)+(0.5*H*H*k2)+s*H*H1*k3;
if(obj_L<obj_H-eps)
alpha(j)=L;
elseif(obj_L>obj_H+eps)
alpha(j)=H;
else
alpha(j)=alpha(j);
end
end
if(abs(alpha(j)-alpha2)<eps)
alpha(j)=alpha2;
continue;%because there is no point going further and calculating alpha1
end
alpha(i)=alpha1+s*(alpha2-alpha(j));
%now we update the b paramaters
b1=E(i)+Y(i)*(alpha(i)-alpha1)*k1+Y(j)*(alpha(j)-alpha2)*k3+b;
b2=E(j)+Y(i)*(alpha(i)-alpha1)*k3+Y(j)*(alpha(j)-alpha2)*k2+b;
if(0<alpha(i)<C && 0<alpha(j)<C)
b=(b1+b2)/2;
elseif(0<alpha(i)<C)
b=b1;
elseif(0<alpha(j)<C)
b=b2;
end
%now we update the weight parameters taking only into account linear
%SVM's
W=W+Y(i)*(alpha(i)-alpha1)*X(i)+Y(j)*(alpha(j)-alpha2)*X(j)
number_of_changed_alphas=number_of_changed_alphas+1;
end
end
if(number_of_changed_alphas==0)
passes=passes+1;
else
passes=0;
end
end
model.W=W;
model.alpha=alpha;
model.b=b;
end
function [kernal_value]=kernal(x1,x2,kernelFunction)
ktmp=0
if(strcmp('linearKernel',kernelFunction)==1)
ktmp=x1'*x2;
elseif(strcmp('gaussianKernel',kernelFunction)==1)
sigma=10;
[m n]=size(x1);
for i=1:n
u=(x1(m,i)-x2(m,i))*(x1(m,i)-x2(m,i));
ktmp=ktmp+exp((-1*u*0.5)/(sigma*sigma));
end
else%default kernal function is linear kernal
ktmp=x1'*x2;
end
kernal_value=ktmp;
end