SHOGUN  4.1.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
NewtonSVM.cpp
Go to the documentation of this file.
1 /*
2  * This program is free software; you can redistribute it and/or modify
3  * it under the terms of the GNU General Public License as published by
4  * the Free Software Foundation; either version 3 of the License, or
5  * (at your option) any later version.
6  *
7  * Written (W) 2012 Harshit Syal
8  * Copyright (C) 2012 Harshit Syal
9  */
10 #include <shogun/lib/config.h>
11 
12 #ifdef HAVE_LAPACK
17 #include <shogun/labels/Labels.h>
20 
21 //#define DEBUG_NEWTON
22 //#define V_NEWTON
23 using namespace shogun;
24 
26 : CLinearMachine(), C(1), use_bias(true)
27 {
28 }
29 
30 CNewtonSVM::CNewtonSVM(float64_t c, CDotFeatures* traindat, CLabels* trainlab, int32_t itr)
32 {
33  lambda=1/c;
34  num_iter=itr;
35  prec=1e-6;
36  num_iter=20;
37  use_bias=true;
38  C=c;
39  set_features(traindat);
40  set_labels(trainlab);
41 }
42 
43 
45 {
46 }
47 
48 
50 {
53 
54  if (data)
55  {
56  if (!data->has_property(FP_DOT))
57  SG_ERROR("Specified features are not of type CDotFeatures\n")
58  set_features((CDotFeatures*) data);
59  }
60 
62 
63  SGVector<float64_t> train_labels=((CBinaryLabels*) m_labels)->get_labels();
64  int32_t num_feat=features->get_dim_feature_space();
65  int32_t num_vec=features->get_num_vectors();
66 
67  //Assigning dimensions for whole class scope
68  x_n=num_vec;
69  x_d=num_feat;
70 
71  ASSERT(num_vec==train_labels.vlen)
72 
73  float64_t* weights = SG_CALLOC(float64_t, x_d+1);
74  float64_t* out=SG_MALLOC(float64_t, x_n);
76 
77  int32_t *sv=SG_MALLOC(int32_t, x_n), size_sv=0, iter=0;
78  float64_t obj, *grad=SG_MALLOC(float64_t, x_d+1);
79  float64_t t;
80 
81  while(1)
82  {
83  iter++;
84 
85  if (iter>num_iter)
86  {
87  SG_PRINT("Maximum number of Newton steps reached. Try larger lambda")
88  break;
89  }
90 
91  obj_fun_linear(weights, out, &obj, sv, &size_sv, grad);
92 
93 #ifdef DEBUG_NEWTON
94  SG_PRINT("fun linear passed !\n")
95  SG_PRINT("Obj =%f\n", obj)
96  SG_PRINT("Grad=\n")
97 
98  for (int32_t i=0; i<x_d+1; i++)
99  SG_PRINT("grad[%d]=%.16g\n", i, grad[i])
100  SG_PRINT("SV=\n")
101 
102  for (int32_t i=0; i<size_sv; i++)
103  SG_PRINT("sv[%d]=%d\n", i, sv[i])
104 #endif
105 
107  float64_t* Xsv = SG_MALLOC(float64_t, x_d*size_sv);
108  for (int32_t k=0; k<size_sv; k++)
109  {
111  for (int32_t j=0; j<x_d; j++)
112  Xsv[k*x_d+j]=sgv.vector[j];
113  }
114  int32_t tx=x_d;
115  int32_t ty=size_sv;
117 
118 #ifdef DEBUG_NEWTON
119  SGMatrix<float64_t>::display_matrix(Xsv, x_d, size_sv);
120 #endif
121 
122  float64_t* lcrossdiag=SG_MALLOC(float64_t, (x_d+1)*(x_d+1));
123  float64_t* vector=SG_MALLOC(float64_t, x_d+1);
124 
125  for (int32_t i=0; i<x_d; i++)
126  vector[i]=lambda;
127 
128  vector[x_d]=0;
129 
130  SGMatrix<float64_t>::create_diagonal_matrix(lcrossdiag, vector, x_d+1);
131  float64_t* Xsv2=SG_MALLOC(float64_t, x_d*x_d);
132  cblas_dgemm(CblasColMajor, CblasTrans, CblasNoTrans, x_d, x_d, size_sv,
133  1.0, Xsv, size_sv, Xsv, size_sv, 0.0, Xsv2, x_d);
134  float64_t* sum=SG_CALLOC(float64_t, x_d);
135 
136  for (int32_t j=0; j<x_d; j++)
137  {
138  for (int32_t i=0; i<size_sv; i++)
139  sum[j]+=Xsv[i+j*size_sv];
140  }
141 
142  float64_t* Xsv2sum=SG_MALLOC(float64_t, (x_d+1)*(x_d+1));
143 
144  for (int32_t i=0; i<x_d; i++)
145  {
146  for (int32_t j=0; j<x_d; j++)
147  Xsv2sum[j*(x_d+1)+i]=Xsv2[j*x_d+i];
148 
149  Xsv2sum[x_d*(x_d+1)+i]=sum[i];
150  }
151 
152  for (int32_t j=0; j<x_d; j++)
153  Xsv2sum[j*(x_d+1)+x_d]=sum[j];
154 
155  Xsv2sum[x_d*(x_d+1)+x_d]=size_sv;
156  float64_t* identity_matrix=SG_MALLOC(float64_t, (x_d+1)*(x_d+1));
157 
158  SGVector<float64_t>::fill_vector(vector, x_d+1, 1.0);
159 
160  SGMatrix<float64_t>::create_diagonal_matrix(identity_matrix, vector, x_d+1);
161  cblas_dgemm(CblasColMajor, CblasNoTrans, CblasNoTrans, x_d+1, x_d+1,
162  x_d+1, 1.0, lcrossdiag, x_d+1, identity_matrix, x_d+1, 1.0,
163  Xsv2sum, x_d+1);
164 
165  float64_t* inverse=SG_MALLOC(float64_t, (x_d+1)*(x_d+1));
166  int32_t r=x_d+1;
167  SGMatrix<float64_t>::pinv(Xsv2sum, r, r, inverse);
168 
169  float64_t* step=SG_MALLOC(float64_t, r);
170  float64_t* s2=SG_MALLOC(float64_t, r);
171  cblas_dgemm(CblasColMajor, CblasNoTrans, CblasNoTrans, r, 1, r, 1.0,
172  inverse, r, grad, r, 0.0, s2, r);
173 
174  for (int32_t i=0; i<r; i++)
175  step[i]=-s2[i];
176 
177  line_search_linear(weights, step, out, &t);
178 
179 #ifdef DEBUG_NEWTON
180  SG_PRINT("t=%f\n\n", t)
181 
182  for (int32_t i=0; i<x_n; i++)
183  SG_PRINT("out[%d]=%.16g\n", i, out[i])
184 
185  for (int32_t i=0; i<x_d+1; i++)
186  SG_PRINT("weights[%d]=%.16g\n", i, weights[i])
187 #endif
188 
190  float64_t newton_decrement;
191  cblas_dgemm(CblasColMajor, CblasTrans, CblasNoTrans, 1, 1, r, -0.5,
192  step, r, grad, r, 0.0, &newton_decrement, 1);
193 #ifdef V_NEWTON
194  SG_PRINT("Itr=%d, Obj=%f, No of sv=%d, Newton dec=%0.3f, line search=%0.3f\n\n",
195  iter, obj, size_sv, newton_decrement, t);
196 #endif
197 
198  SG_FREE(Xsv);
199  SG_FREE(vector);
200  SG_FREE(lcrossdiag);
201  SG_FREE(Xsv2);
202  SG_FREE(Xsv2sum);
203  SG_FREE(identity_matrix);
204  SG_FREE(inverse);
205  SG_FREE(step);
206  SG_FREE(s2);
207 
208  if (newton_decrement*2<prec*obj)
209  break;
210  }
211 
212 #ifdef V_NEWTON
213  SG_PRINT("FINAL W AND BIAS Vector=\n\n")
214  CMath::display_matrix(weights, x_d+1, 1);
215 #endif
216 
217  set_w(SGVector<float64_t>(weights, x_d));
218  set_bias(weights[x_d]);
219 
220  SG_FREE(sv);
221  SG_FREE(grad);
222  SG_FREE(out);
223 
224  return true;
225 
226 
227 }
228 
229 void CNewtonSVM::line_search_linear(float64_t* weights, float64_t* d, float64_t*
230  out, float64_t* tx)
231 {
232  SGVector<float64_t> Y=((CBinaryLabels*) m_labels)->get_labels();
233  float64_t* outz=SG_MALLOC(float64_t, x_n);
234  float64_t* temp1=SG_MALLOC(float64_t, x_n);
235  float64_t* temp1forout=SG_MALLOC(float64_t, x_n);
236  float64_t* outzsv=SG_MALLOC(float64_t, x_n);
237  float64_t* Ysv=SG_MALLOC(float64_t, x_n);
238  float64_t* Xsv=SG_MALLOC(float64_t, x_n);
239  float64_t* temp2=SG_MALLOC(float64_t, x_d);
240  float64_t t=0.0;
241  float64_t* Xd=SG_MALLOC(float64_t, x_n);
242 
243  for (int32_t i=0; i<x_n; i++)
244  Xd[i]=features->dense_dot(i, d, x_d);
245 
247 
248 #ifdef DEBUG_NEWTON
249  CMath::display_vector(d, x_d+1, "Weight vector");
250 
251  for (int32_t i=0; i<x_d+1; i++)
252  SG_SPRINT("Xd[%d]=%.18g\n", i, Xd[i])
253 
254  CMath::display_vector(Xd, x_n, "XD vector=");
255 #endif
256 
257  float64_t wd;
258  cblas_dgemm(CblasColMajor, CblasTrans, CblasNoTrans, 1, 1, x_d, lambda,
259  weights, x_d, d, x_d, 0.0, &wd, 1);
260  float64_t tempg, dd;
261  cblas_dgemm(CblasColMajor, CblasTrans, CblasNoTrans, 1, 1, x_d, lambda, d,
262  x_d, d, x_d, 0.0, &dd, 1);
263 
264  float64_t g, h;
265  int32_t sv_len=0, *sv=SG_MALLOC(int32_t, x_n);
266 
267  do
268  {
269  SGVector<float64_t>::vector_multiply(temp1, Y.vector, Xd, x_n);
270  memcpy(temp1forout, temp1, sizeof(float64_t)*x_n);
271  SGVector<float64_t>::scale_vector(t, temp1forout, x_n);
272  SGVector<float64_t>::add(outz, 1.0, out, -1.0, temp1forout, x_n);
273 
274  // Calculation of sv
275  sv_len=0;
276 
277  for (int32_t i=0; i<x_n; i++)
278  {
279  if (outz[i]>0)
280  sv[sv_len++]=i;
281  }
282 
283  //Calculation of gradient 'g'
284  for (int32_t i=0; i<sv_len; i++)
285  {
286  outzsv[i]=outz[sv[i]];
287  Ysv[i]=Y.vector[sv[i]];
288  Xsv[i]=Xd[sv[i]];
289  }
290 
291  memset(temp1, 0, sizeof(float64_t)*sv_len);
292  SGVector<float64_t>::vector_multiply(temp1, outzsv, Ysv, sv_len);
293  tempg=CMath::dot(temp1, Xsv, sv_len);
294  g=wd+(t*dd);
295  g-=tempg;
296 
297  // Calculation of second derivative 'h'
298  cblas_dgemm(CblasColMajor, CblasTrans, CblasNoTrans, 1, 1, sv_len, 1.0,
299  Xsv, sv_len, Xsv, sv_len, 0.0, &h, 1);
300  h+=dd;
301 
302  // Calculation of 1D Newton step 'd'
303  t-=g/h;
304 
305  if (((g*g)/h)<1e-10)
306  break;
307 
308  } while(1);
309 
310  for (int32_t i=0; i<x_n; i++)
311  out[i]=outz[i];
312  *tx=t;
313 
314  SG_FREE(sv);
315  SG_FREE(temp1);
316  SG_FREE(temp2);
317  SG_FREE(temp1forout);
318  SG_FREE(outz);
319  SG_FREE(outzsv);
320  SG_FREE(Ysv);
321  SG_FREE(Xsv);
322  SG_FREE(Xd);
323 }
324 
325 void CNewtonSVM::obj_fun_linear(float64_t* weights, float64_t* out,
326  float64_t* obj, int32_t* sv, int32_t* numsv, float64_t* grad)
327 {
328  SGVector<float64_t> v=((CBinaryLabels*) m_labels)->get_labels();
329 
330  for (int32_t i=0; i<x_n; i++)
331  {
332  if (out[i]<0)
333  out[i]=0;
334  }
335 
336 #ifdef DEBUG_NEWTON
337  for (int32_t i=0; i<x_n; i++)
338  SG_SPRINT("out[%d]=%.16g\n", i, out[i])
339 #endif
340 
341  //create copy of w0
342  float64_t* w0=SG_MALLOC(float64_t, x_d+1);
343  memcpy(w0, weights, sizeof(float64_t)*(x_d));
344  w0[x_d]=0; //do not penalize b
345 
346  //create copy of out
347  float64_t* out1=SG_MALLOC(float64_t, x_n);
348 
349  //compute steps for obj
350  SGVector<float64_t>::vector_multiply(out1, out, out, x_n);
351  float64_t p1=SGVector<float64_t>::sum(out1, x_n)/2;
352  float64_t C1;
353  float64_t* w0copy=SG_MALLOC(float64_t, x_d+1);
354  memcpy(w0copy, w0, sizeof(float64_t)*(x_d+1));
355  SGVector<float64_t>::scale_vector(0.5, w0copy, x_d+1);
356  cblas_dgemm(CblasColMajor, CblasTrans, CblasNoTrans, 1, 1, x_d+1, lambda,
357  w0, x_d+1, w0copy, x_d+1, 0.0, &C1, 1);
358  *obj=p1+C1;
360  float64_t* temp=SG_CALLOC(float64_t, x_n); //temp = out.*Y
361  SGVector<float64_t>::vector_multiply(temp, out, v.vector, x_n);
362  float64_t* temp1=SG_CALLOC(float64_t, x_d);
364 
365  for (int32_t i=0; i<x_n; i++)
366  {
367  features->add_to_dense_vec(temp[i], i, temp1, x_d);
368 #ifdef DEBUG_NEWTON
369  SG_SPRINT("\ntemp[%d]=%f", i, temp[i])
370  CMath::display_vector(vec.vector, x_d, "vector");
371  CMath::display_vector(temp1, x_d, "debuging");
372 #endif
373  }
374  float64_t* p2=SG_MALLOC(float64_t, x_d+1);
375 
376  for (int32_t i=0; i<x_d; i++)
377  p2[i]=temp1[i];
378 
379  p2[x_d]=SGVector<float64_t>::sum(temp, x_n);
380  SGVector<float64_t>::add(grad, 1.0, w0, -1.0, p2, x_d+1);
381  int32_t sv_len=0;
382 
383  for (int32_t i=0; i<x_n; i++)
384  {
385  if (out[i]>0)
386  sv[sv_len++]=i;
387  }
388 
389  *numsv=sv_len;
390 
391  SG_FREE(w0);
392  SG_FREE(w0copy);
393  SG_FREE(out1);
394  SG_FREE(temp);
395  SG_FREE(temp1);
396  SG_FREE(p2);
397 }
398 #endif //HAVE_LAPACK
void display_matrix(const char *name="matrix") const
Definition: SGMatrix.cpp:394
static void fill_vector(T *vec, int32_t len, T value)
Definition: SGVector.cpp:223
virtual ELabelType get_label_type() const =0
binary labels +1/-1
Definition: LabelTypes.h:18
virtual bool train_machine(CFeatures *data=NULL)
Definition: NewtonSVM.cpp:49
virtual void set_w(const SGVector< float64_t > src_w)
The class Labels models labels, i.e. class assignments of objects.
Definition: Labels.h:43
virtual float64_t dense_dot(int32_t vec_idx1, const float64_t *vec2, int32_t vec2_len)=0
virtual int32_t get_num_vectors() const =0
static void create_diagonal_matrix(T *matrix, T *v, int32_t size)
Definition: SGMatrix.cpp:295
CLabels * m_labels
Definition: Machine.h:361
#define SG_ERROR(...)
Definition: SGIO.h:129
virtual void add_to_dense_vec(float64_t alpha, int32_t vec_idx1, float64_t *vec2, int32_t vec2_len, bool abs_val=false)=0
float64_t lambda
Definition: NewtonSVM.h:119
Features that support dot products among other operations.
Definition: DotFeatures.h:44
virtual int32_t get_dim_feature_space() const =0
static void scale_vector(T alpha, T *vec, int32_t len)
Scale vector inplace.
Definition: SGVector.cpp:822
static void transpose_matrix(T *&matrix, int32_t &num_feat, int32_t &num_vec)
Definition: SGMatrix.cpp:277
static float64_t * pinv(float64_t *matrix, int32_t rows, int32_t cols, float64_t *target=NULL)
Definition: SGMatrix.cpp:846
index_t vlen
Definition: SGVector.h:494
#define SG_PRINT(...)
Definition: SGIO.h:137
#define SG_SPRINT(...)
Definition: SGIO.h:180
#define ASSERT(x)
Definition: SGIO.h:201
virtual ~CNewtonSVM()
Definition: NewtonSVM.cpp:44
static void vector_multiply(T *target, const T *v1, const T *v2, int32_t len)
Compute vector multiplication.
Definition: SGVector.h:326
static void add_scalar(T alpha, T *vec, int32_t len)
Add scalar to vector inplace.
Definition: SGVector.h:344
double float64_t
Definition: common.h:50
virtual void set_features(CDotFeatures *feat)
static T sum(T *vec, int32_t len)
Return sum(vec)
Definition: SGVector.h:354
Class LinearMachine is a generic interface for all kinds of linear machines like classifiers.
Definition: LinearMachine.h:63
static float64_t dot(const bool *v1, const bool *v2, int32_t n)
Compute dot product between v1 and v2 (blas optimized)
Definition: Math.h:627
static void vec1_plus_scalar_times_vec2(T *vec1, const T scalar, const T *vec2, int32_t n)
x=x+alpha*y
Definition: SGVector.cpp:531
float64_t prec
Definition: NewtonSVM.h:120
CDotFeatures * features
all of classes and functions are contained in the shogun namespace
Definition: class_list.h:18
The class Features is the base class of all feature objects.
Definition: Features.h:68
SGVector< float64_t > get_computed_dot_feature_vector(int32_t num)
Binary Labels for binary classification.
Definition: BinaryLabels.h:37
virtual void set_bias(float64_t b)
bool has_property(EFeatureProperty p) const
Definition: Features.cpp:295
virtual void set_labels(CLabels *lab)
Definition: Machine.cpp:73
void add(const SGVector< T > x)
Definition: SGVector.cpp:281

SHOGUN Machine Learning Toolbox - Documentation