SHOGUN  6.0.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules
NewtonSVM.cpp
Go to the documentation of this file.
1 /*
2  * This program is free software; you can redistribute it and/or modify
3  * it under the terms of the GNU General Public License as published by
4  * the Free Software Foundation; either version 3 of the License, or
5  * (at your option) any later version.
6  *
7  * Written (W) 2012 Harshit Syal
8  * Copyright (C) 2012 Harshit Syal
9  */
10 #include <shogun/lib/config.h>
11 
12 #ifdef HAVE_LAPACK
17 #include <shogun/labels/Labels.h>
20 #include <shogun/lib/Signal.h>
21 
22 //#define DEBUG_NEWTON
23 //#define V_NEWTON
24 using namespace shogun;
25 
27 : CLinearMachine(), C(1), use_bias(true)
28 {
29 }
30 
31 CNewtonSVM::CNewtonSVM(float64_t c, CDotFeatures* traindat, CLabels* trainlab, int32_t itr)
33 {
34  lambda=1/c;
35  num_iter=itr;
36  prec=1e-6;
37  num_iter=20;
38  use_bias=true;
39  C=c;
40  set_features(traindat);
41  set_labels(trainlab);
42 }
43 
44 
46 {
47 }
48 
49 
51 {
55 
56  if (data)
57  {
58  if (!data->has_property(FP_DOT))
59  SG_ERROR("Specified features are not of type CDotFeatures\n")
60  set_features((CDotFeatures*) data);
61  }
62 
64 
65  SGVector<float64_t> train_labels=((CBinaryLabels*) m_labels)->get_labels();
66  int32_t num_feat=features->get_dim_feature_space();
67  int32_t num_vec=features->get_num_vectors();
68 
69  //Assigning dimensions for whole class scope
70  x_n=num_vec;
71  x_d=num_feat;
72 
73  ASSERT(num_vec==train_labels.vlen)
74 
75  float64_t* weights = SG_CALLOC(float64_t, x_d+1);
76  float64_t* out=SG_MALLOC(float64_t, x_n);
78 
79  int32_t *sv=SG_MALLOC(int32_t, x_n), size_sv=0, iter=0;
80  float64_t obj, *grad=SG_MALLOC(float64_t, x_d+1);
81  float64_t t;
82 
84  {
85  iter++;
86 
87  if (iter>num_iter)
88  {
89  SG_PRINT("Maximum number of Newton steps reached. Try larger lambda")
90  break;
91  }
92 
93  obj_fun_linear(weights, out, &obj, sv, &size_sv, grad);
94 
95 #ifdef DEBUG_NEWTON
96  SG_PRINT("fun linear passed !\n")
97  SG_PRINT("Obj =%f\n", obj)
98  SG_PRINT("Grad=\n")
99 
100  for (int32_t i=0; i<x_d+1; i++)
101  SG_PRINT("grad[%d]=%.16g\n", i, grad[i])
102  SG_PRINT("SV=\n")
103 
104  for (int32_t i=0; i<size_sv; i++)
105  SG_PRINT("sv[%d]=%d\n", i, sv[i])
106 #endif
107 
109  float64_t* Xsv = SG_MALLOC(float64_t, x_d*size_sv);
110  for (int32_t k=0; k<size_sv; k++)
111  {
113  for (int32_t j=0; j<x_d; j++)
114  Xsv[k*x_d+j]=sgv.vector[j];
115  }
116  int32_t tx=x_d;
117  int32_t ty=size_sv;
119 
120 #ifdef DEBUG_NEWTON
121  SGMatrix<float64_t>::display_matrix(Xsv, x_d, size_sv);
122 #endif
123 
124  float64_t* lcrossdiag=SG_MALLOC(float64_t, (x_d+1)*(x_d+1));
125  float64_t* vector=SG_MALLOC(float64_t, x_d+1);
126 
127  for (int32_t i=0; i<x_d; i++)
128  vector[i]=lambda;
129 
130  vector[x_d]=0;
131 
132  SGMatrix<float64_t>::create_diagonal_matrix(lcrossdiag, vector, x_d+1);
133  float64_t* Xsv2=SG_MALLOC(float64_t, x_d*x_d);
134  cblas_dgemm(CblasColMajor, CblasTrans, CblasNoTrans, x_d, x_d, size_sv,
135  1.0, Xsv, size_sv, Xsv, size_sv, 0.0, Xsv2, x_d);
136  float64_t* sum=SG_CALLOC(float64_t, x_d);
137 
138  for (int32_t j=0; j<x_d; j++)
139  {
140  for (int32_t i=0; i<size_sv; i++)
141  sum[j]+=Xsv[i+j*size_sv];
142  }
143 
144  float64_t* Xsv2sum=SG_MALLOC(float64_t, (x_d+1)*(x_d+1));
145 
146  for (int32_t i=0; i<x_d; i++)
147  {
148  for (int32_t j=0; j<x_d; j++)
149  Xsv2sum[j*(x_d+1)+i]=Xsv2[j*x_d+i];
150 
151  Xsv2sum[x_d*(x_d+1)+i]=sum[i];
152  }
153 
154  for (int32_t j=0; j<x_d; j++)
155  Xsv2sum[j*(x_d+1)+x_d]=sum[j];
156 
157  Xsv2sum[x_d*(x_d+1)+x_d]=size_sv;
158  float64_t* identity_matrix=SG_MALLOC(float64_t, (x_d+1)*(x_d+1));
159 
160  SGVector<float64_t>::fill_vector(vector, x_d+1, 1.0);
161 
162  SGMatrix<float64_t>::create_diagonal_matrix(identity_matrix, vector, x_d+1);
163  cblas_dgemm(CblasColMajor, CblasNoTrans, CblasNoTrans, x_d+1, x_d+1,
164  x_d+1, 1.0, lcrossdiag, x_d+1, identity_matrix, x_d+1, 1.0,
165  Xsv2sum, x_d+1);
166 
167  float64_t* inverse=SG_MALLOC(float64_t, (x_d+1)*(x_d+1));
168  int32_t r=x_d+1;
169  SGMatrix<float64_t>::pinv(Xsv2sum, r, r, inverse);
170 
171  float64_t* step=SG_MALLOC(float64_t, r);
172  float64_t* s2=SG_MALLOC(float64_t, r);
173  cblas_dgemm(CblasColMajor, CblasNoTrans, CblasNoTrans, r, 1, r, 1.0,
174  inverse, r, grad, r, 0.0, s2, r);
175 
176  for (int32_t i=0; i<r; i++)
177  step[i]=-s2[i];
178 
179  line_search_linear(weights, step, out, &t);
180 
181 #ifdef DEBUG_NEWTON
182  SG_PRINT("t=%f\n\n", t)
183 
184  for (int32_t i=0; i<x_n; i++)
185  SG_PRINT("out[%d]=%.16g\n", i, out[i])
186 
187  for (int32_t i=0; i<x_d+1; i++)
188  SG_PRINT("weights[%d]=%.16g\n", i, weights[i])
189 #endif
190 
192  float64_t newton_decrement;
193  cblas_dgemm(CblasColMajor, CblasTrans, CblasNoTrans, 1, 1, r, -0.5,
194  step, r, grad, r, 0.0, &newton_decrement, 1);
195 #ifdef V_NEWTON
196  SG_PRINT("Itr=%d, Obj=%f, No of sv=%d, Newton dec=%0.3f, line search=%0.3f\n\n",
197  iter, obj, size_sv, newton_decrement, t);
198 #endif
199 
200  SG_FREE(Xsv);
201  SG_FREE(vector);
202  SG_FREE(lcrossdiag);
203  SG_FREE(Xsv2);
204  SG_FREE(Xsv2sum);
205  SG_FREE(identity_matrix);
206  SG_FREE(inverse);
207  SG_FREE(step);
208  SG_FREE(s2);
209 
210  if (newton_decrement*2<prec*obj)
211  break;
212  }
213 
214 #ifdef V_NEWTON
215  SG_PRINT("FINAL W AND BIAS Vector=\n\n")
216  CMath::display_matrix(weights, x_d+1, 1);
217 #endif
218 
219  set_w(SGVector<float64_t>(weights, x_d));
220  set_bias(weights[x_d]);
221 
222  SG_FREE(sv);
223  SG_FREE(grad);
224  SG_FREE(out);
225 
226  return true;
227 
228 
229 }
230 
231 void CNewtonSVM::line_search_linear(float64_t* weights, float64_t* d, float64_t*
232  out, float64_t* tx)
233 {
234  SGVector<float64_t> Y=((CBinaryLabels*) m_labels)->get_labels();
235  float64_t* outz=SG_MALLOC(float64_t, x_n);
236  float64_t* temp1=SG_MALLOC(float64_t, x_n);
237  float64_t* temp1forout=SG_MALLOC(float64_t, x_n);
238  float64_t* outzsv=SG_MALLOC(float64_t, x_n);
239  float64_t* Ysv=SG_MALLOC(float64_t, x_n);
240  float64_t* Xsv=SG_MALLOC(float64_t, x_n);
241  float64_t* temp2=SG_MALLOC(float64_t, x_d);
242  float64_t t=0.0;
243  float64_t* Xd=SG_MALLOC(float64_t, x_n);
244 
245  for (int32_t i=0; i<x_n; i++)
246  Xd[i]=features->dense_dot(i, d, x_d);
247 
249 
250 #ifdef DEBUG_NEWTON
251  CMath::display_vector(d, x_d+1, "Weight vector");
252 
253  for (int32_t i=0; i<x_d+1; i++)
254  SG_SPRINT("Xd[%d]=%.18g\n", i, Xd[i])
255 
256  CMath::display_vector(Xd, x_n, "XD vector=");
257 #endif
258 
259  float64_t wd;
260  cblas_dgemm(CblasColMajor, CblasTrans, CblasNoTrans, 1, 1, x_d, lambda,
261  weights, x_d, d, x_d, 0.0, &wd, 1);
262  float64_t tempg, dd;
263  cblas_dgemm(CblasColMajor, CblasTrans, CblasNoTrans, 1, 1, x_d, lambda, d,
264  x_d, d, x_d, 0.0, &dd, 1);
265 
266  float64_t g, h;
267  int32_t sv_len=0, *sv=SG_MALLOC(int32_t, x_n);
268 
269  do
270  {
271  SGVector<float64_t>::vector_multiply(temp1, Y.vector, Xd, x_n);
272  sg_memcpy(temp1forout, temp1, sizeof(float64_t)*x_n);
273  SGVector<float64_t>::scale_vector(t, temp1forout, x_n);
274  SGVector<float64_t>::add(outz, 1.0, out, -1.0, temp1forout, x_n);
275 
276  // Calculation of sv
277  sv_len=0;
278 
279  for (int32_t i=0; i<x_n; i++)
280  {
281  if (outz[i]>0)
282  sv[sv_len++]=i;
283  }
284 
285  //Calculation of gradient 'g'
286  for (int32_t i=0; i<sv_len; i++)
287  {
288  outzsv[i]=outz[sv[i]];
289  Ysv[i]=Y.vector[sv[i]];
290  Xsv[i]=Xd[sv[i]];
291  }
292 
293  memset(temp1, 0, sizeof(float64_t)*sv_len);
294  SGVector<float64_t>::vector_multiply(temp1, outzsv, Ysv, sv_len);
295  tempg=CMath::dot(temp1, Xsv, sv_len);
296  g=wd+(t*dd);
297  g-=tempg;
298 
299  // Calculation of second derivative 'h'
300  cblas_dgemm(CblasColMajor, CblasTrans, CblasNoTrans, 1, 1, sv_len, 1.0,
301  Xsv, sv_len, Xsv, sv_len, 0.0, &h, 1);
302  h+=dd;
303 
304  // Calculation of 1D Newton step 'd'
305  t-=g/h;
306 
307  if (((g*g)/h)<1e-10)
308  break;
309 
310  } while(1);
311 
312  for (int32_t i=0; i<x_n; i++)
313  out[i]=outz[i];
314  *tx=t;
315 
316  SG_FREE(sv);
317  SG_FREE(temp1);
318  SG_FREE(temp2);
319  SG_FREE(temp1forout);
320  SG_FREE(outz);
321  SG_FREE(outzsv);
322  SG_FREE(Ysv);
323  SG_FREE(Xsv);
324  SG_FREE(Xd);
325 }
326 
327 void CNewtonSVM::obj_fun_linear(float64_t* weights, float64_t* out,
328  float64_t* obj, int32_t* sv, int32_t* numsv, float64_t* grad)
329 {
330  SGVector<float64_t> v=((CBinaryLabels*) m_labels)->get_labels();
331 
332  for (int32_t i=0; i<x_n; i++)
333  {
334  if (out[i]<0)
335  out[i]=0;
336  }
337 
338 #ifdef DEBUG_NEWTON
339  for (int32_t i=0; i<x_n; i++)
340  SG_SPRINT("out[%d]=%.16g\n", i, out[i])
341 #endif
342 
343  //create copy of w0
344  float64_t* w0=SG_MALLOC(float64_t, x_d+1);
345  sg_memcpy(w0, weights, sizeof(float64_t)*(x_d));
346  w0[x_d]=0; //do not penalize b
347 
348  //create copy of out
349  float64_t* out1=SG_MALLOC(float64_t, x_n);
350 
351  //compute steps for obj
352  SGVector<float64_t>::vector_multiply(out1, out, out, x_n);
353  float64_t p1=SGVector<float64_t>::sum(out1, x_n)/2;
354  float64_t C1;
355  float64_t* w0copy=SG_MALLOC(float64_t, x_d+1);
356  sg_memcpy(w0copy, w0, sizeof(float64_t)*(x_d+1));
357  SGVector<float64_t>::scale_vector(0.5, w0copy, x_d+1);
358  cblas_dgemm(CblasColMajor, CblasTrans, CblasNoTrans, 1, 1, x_d+1, lambda,
359  w0, x_d+1, w0copy, x_d+1, 0.0, &C1, 1);
360  *obj=p1+C1;
362  float64_t* temp=SG_CALLOC(float64_t, x_n); //temp = out.*Y
363  SGVector<float64_t>::vector_multiply(temp, out, v.vector, x_n);
364  float64_t* temp1=SG_CALLOC(float64_t, x_d);
366 
367  for (int32_t i=0; i<x_n; i++)
368  {
369  features->add_to_dense_vec(temp[i], i, temp1, x_d);
370 #ifdef DEBUG_NEWTON
371  SG_SPRINT("\ntemp[%d]=%f", i, temp[i])
372  CMath::display_vector(vec.vector, x_d, "vector");
373  CMath::display_vector(temp1, x_d, "debuging");
374 #endif
375  }
376  float64_t* p2=SG_MALLOC(float64_t, x_d+1);
377 
378  for (int32_t i=0; i<x_d; i++)
379  p2[i]=temp1[i];
380 
381  p2[x_d]=SGVector<float64_t>::sum(temp, x_n);
382  SGVector<float64_t>::add(grad, 1.0, w0, -1.0, p2, x_d+1);
383  int32_t sv_len=0;
384 
385  for (int32_t i=0; i<x_n; i++)
386  {
387  if (out[i]>0)
388  sv[sv_len++]=i;
389  }
390 
391  *numsv=sv_len;
392 
393  SG_FREE(w0);
394  SG_FREE(w0copy);
395  SG_FREE(out1);
396  SG_FREE(temp);
397  SG_FREE(temp1);
398  SG_FREE(p2);
399 }
400 #endif //HAVE_LAPACK
void display_matrix(const char *name="matrix") const
Definition: SGMatrix.cpp:480
static void fill_vector(T *vec, int32_t len, T value)
Definition: SGVector.cpp:264
virtual ELabelType get_label_type() const =0
binary labels +1/-1
Definition: LabelTypes.h:18
virtual bool train_machine(CFeatures *data=NULL)
Definition: NewtonSVM.cpp:50
virtual void set_w(const SGVector< float64_t > src_w)
The class Labels models labels, i.e. class assignments of objects.
Definition: Labels.h:43
virtual float64_t dense_dot(int32_t vec_idx1, const float64_t *vec2, int32_t vec2_len)=0
virtual int32_t get_num_vectors() const =0
static void create_diagonal_matrix(T *matrix, T *v, int32_t size)
Definition: SGMatrix.cpp:375
CLabels * m_labels
Definition: Machine.h:365
#define SG_ERROR(...)
Definition: SGIO.h:128
virtual void add_to_dense_vec(float64_t alpha, int32_t vec_idx1, float64_t *vec2, int32_t vec2_len, bool abs_val=false)=0
float64_t lambda
Definition: NewtonSVM.h:119
Features that support dot products among other operations.
Definition: DotFeatures.h:44
virtual int32_t get_dim_feature_space() const =0
static void scale_vector(T alpha, T *vec, int32_t len)
Scale vector inplace.
Definition: SGVector.cpp:863
static void transpose_matrix(T *&matrix, int32_t &num_feat, int32_t &num_vec)
Definition: SGMatrix.cpp:358
static float64_t * pinv(float64_t *matrix, int32_t rows, int32_t cols, float64_t *target=NULL)
Definition: SGMatrix.cpp:933
index_t vlen
Definition: SGVector.h:545
#define SG_PRINT(...)
Definition: SGIO.h:136
#define SG_SPRINT(...)
Definition: SGIO.h:179
#define ASSERT(x)
Definition: SGIO.h:200
virtual ~CNewtonSVM()
Definition: NewtonSVM.cpp:45
static void clear_cancel()
Definition: Signal.cpp:126
static void vector_multiply(T *target, const T *v1, const T *v2, int32_t len)
Compute vector multiplication.
Definition: SGVector.h:364
static void add_scalar(T alpha, T *vec, int32_t len)
Add scalar to vector inplace.
Definition: SGVector.h:382
double float64_t
Definition: common.h:60
virtual void set_features(CDotFeatures *feat)
static T sum(T *vec, int32_t len)
Return sum(vec)
Definition: SGVector.h:392
Class LinearMachine is a generic interface for all kinds of linear machines like classifiers.
Definition: LinearMachine.h:63
static float64_t dot(const bool *v1, const bool *v2, int32_t n)
Compute dot product between v1 and v2 (blas optimized)
Definition: Math.h:622
static void vec1_plus_scalar_times_vec2(T *vec1, const T scalar, const T *vec2, int32_t n)
x=x+alpha*y
Definition: SGVector.cpp:571
static bool cancel_computations()
Definition: Signal.h:111
float64_t prec
Definition: NewtonSVM.h:120
CDotFeatures * features
all of classes and functions are contained in the shogun namespace
Definition: class_list.h:18
T sum(const Container< T > &a, bool no_diag=false)
The class Features is the base class of all feature objects.
Definition: Features.h:68
SGVector< float64_t > get_computed_dot_feature_vector(int32_t num)
Binary Labels for binary classification.
Definition: BinaryLabels.h:37
virtual void set_bias(float64_t b)
bool has_property(EFeatureProperty p) const
Definition: Features.cpp:295
virtual void set_labels(CLabels *lab)
Definition: Machine.cpp:65
void add(const SGVector< T > x)
Definition: SGVector.cpp:310

SHOGUN Machine Learning Toolbox - Documentation