SHOGUN  5.0.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules
libbmrm.cpp
Go to the documentation of this file.
1 /*
2  * This program is free software; you can redistribute it and/or modify
3  * it under the terms of the GNU General Public License as published by
4  * the Free Software Foundation; either version 3 of the License, or
5  * (at your option) any later version.
6  *
7  * libbmrm.h: Implementation of the BMRM solver for SO training
8  *
9  * Copyright (C) 2012 Michal Uricar, uricamic@cmp.felk.cvut.cz
10  *
11  * Implementation of the BMRM solver
12  *--------------------------------------------------------------------- */
13 
15 #ifdef USE_GPL_SHOGUN
16 #include <shogun/lib/external/libqp.h>
17 #include <shogun/lib/Time.h>
19 
20 #include <climits>
21 #include <limits>
22 
23 namespace shogun
24 {
25 static const uint32_t QPSolverMaxIter=0xFFFFFFFF;
26 static const float64_t epsilon=0.0;
27 
28 static float64_t *H;
29 uint32_t BufSize;
30 
31 void add_cutting_plane(
32  bmrm_ll** tail,
33  bool* map,
34  float64_t* A,
35  uint32_t free_idx,
36  float64_t* cp_data,
37  uint32_t dim)
38 {
39  REQUIRE(map[free_idx],
40  "add_cutting_plane: CP index %u is not free\n", free_idx)
41 
42  LIBBMRM_MEMCPY(A+free_idx*dim, cp_data, dim*sizeof(float64_t));
43  map[free_idx]=false;
44 
45  bmrm_ll *cp=(bmrm_ll*)LIBBMRM_CALLOC(1, bmrm_ll);
46 
47  if (cp==NULL)
48  {
49  SG_SERROR("Out of memory.\n")
50  return;
51  }
52 
53  cp->address=A+(free_idx*dim);
54  cp->prev=*tail;
55  cp->next=NULL;
56  cp->idx=free_idx;
57  (*tail)->next=cp;
58  *tail=cp;
59 }
60 
61 void remove_cutting_plane(
62  bmrm_ll** head,
63  bmrm_ll** tail,
64  bool* map,
65  float64_t* icp)
66 {
67  bmrm_ll *cp_list_ptr=*head;
68 
69  while(cp_list_ptr->address != icp)
70  {
71  cp_list_ptr=cp_list_ptr->next;
72  }
73 
74  if (cp_list_ptr==*head)
75  {
76  *head=(*head)->next;
77  cp_list_ptr->next->prev=NULL;
78  }
79  else if (cp_list_ptr==*tail)
80  {
81  *tail=(*tail)->prev;
82  cp_list_ptr->prev->next=NULL;
83  }
84  else
85  {
86  cp_list_ptr->prev->next=cp_list_ptr->next;
87  cp_list_ptr->next->prev=cp_list_ptr->prev;
88  }
89 
90  map[cp_list_ptr->idx]=true;
91  LIBBMRM_FREE(cp_list_ptr);
92 }
93 
94 void clean_icp(ICP_stats* icp_stats,
95  BmrmStatistics& bmrm,
96  bmrm_ll** head,
97  bmrm_ll** tail,
98  float64_t*& Hmat,
99  float64_t*& diag_H,
100  float64_t*& beta,
101  bool*& map,
102  uint32_t cleanAfter,
103  float64_t*& b,
104  uint32_t*& I,
105  uint32_t cp_models
106  )
107 {
108  /* find ICP */
109  uint32_t cntICP=0;
110  uint32_t cntACP=0;
111  bmrm_ll* cp_ptr=*head;
112  uint32_t tmp_idx=0;
113 
114  while (cp_ptr != *tail)
115  {
116  if (icp_stats->ICPcounter[tmp_idx++]>=cleanAfter)
117  {
118  icp_stats->ICPs[cntICP++]=cp_ptr->address;
119  }
120  else
121  {
122  icp_stats->ACPs[cntACP++]=tmp_idx-1;
123  }
124 
125  cp_ptr=cp_ptr->next;
126  }
127 
128  /* do ICP removal */
129  if (cntICP > 0)
130  {
131  uint32_t nCP_new=bmrm.nCP-cntICP;
132 
133  for (uint32_t i=0; i<cntICP; ++i)
134  {
135  tmp_idx=0;
136  cp_ptr=*head;
137 
138  while(cp_ptr->address != icp_stats->ICPs[i])
139  {
140  cp_ptr=cp_ptr->next;
141  tmp_idx++;
142  }
143 
144  remove_cutting_plane(head, tail, map, icp_stats->ICPs[i]);
145 
146  LIBBMRM_MEMMOVE(b+tmp_idx, b+tmp_idx+1,
147  (bmrm.nCP+cp_models-tmp_idx)*sizeof(float64_t));
148  LIBBMRM_MEMMOVE(beta+tmp_idx, beta+tmp_idx+1,
149  (bmrm.nCP-tmp_idx)*sizeof(float64_t));
150  LIBBMRM_MEMMOVE(diag_H+tmp_idx, diag_H+tmp_idx+1,
151  (bmrm.nCP-tmp_idx)*sizeof(float64_t));
152  LIBBMRM_MEMMOVE(I+tmp_idx, I+tmp_idx+1,
153  (bmrm.nCP-tmp_idx)*sizeof(uint32_t));
154  LIBBMRM_MEMMOVE(icp_stats->ICPcounter+tmp_idx, icp_stats->ICPcounter+tmp_idx+1,
155  (bmrm.nCP-tmp_idx)*sizeof(uint32_t));
156  }
157 
158  /* H */
159  for (uint32_t i=0; i < nCP_new; ++i)
160  {
161  for (uint32_t j=0; j < nCP_new; ++j)
162  {
163  icp_stats->H_buff[LIBBMRM_INDEX(i, j, icp_stats->maxCPs)]=
164  Hmat[LIBBMRM_INDEX(icp_stats->ACPs[i], icp_stats->ACPs[j], icp_stats->maxCPs)];
165  }
166  }
167 
168  for (uint32_t i=0; i<nCP_new; ++i)
169  for (uint32_t j=0; j<nCP_new; ++j)
170  Hmat[LIBBMRM_INDEX(i, j, icp_stats->maxCPs)]=
171  icp_stats->H_buff[LIBBMRM_INDEX(i, j, icp_stats->maxCPs)];
172 
173  bmrm.nCP=nCP_new;
174  ASSERT(bmrm.nCP<BufSize);
175  }
176 }
177 
178 /*----------------------------------------------------------------------
179  Returns pointer at i-th column of Hessian matrix.
180  ----------------------------------------------------------------------*/
181 static const float64_t *get_col( uint32_t i)
182 {
183  return( &H[ BufSize*i ] );
184 }
185 
186 BmrmStatistics svm_bmrm_solver(
187  CDualLibQPBMSOSVM *machine,
188  float64_t* W,
189  float64_t TolRel,
190  float64_t TolAbs,
191  float64_t _lambda,
192  uint32_t _BufSize,
193  bool cleanICP,
194  uint32_t cleanAfter,
195  float64_t K,
196  uint32_t Tmax,
197  bool store_train_info)
198 {
199  BmrmStatistics bmrm;
200  libqp_state_T qp_exitflag={0, 0, 0, 0};
201  float64_t *b, *beta, *diag_H, *prevW;
202  float64_t R, *subgrad, *A, QPSolverTolRel, C=1.0, wdist=0.0;
203  floatmax_t rsum, sq_norm_W, sq_norm_Wdiff=0.0;
204  uint32_t *I;
205  uint8_t S=1;
206  CStructuredModel* model=machine->get_model();
207  uint32_t nDim=model->get_dim();
208  CSOSVMHelper* helper = NULL;
209 
210  CTime ttime;
211  float64_t tstart, tstop;
212 
213  bmrm_ll *CPList_head, *CPList_tail, *cp_ptr, *cp_ptr2, *cp_list=NULL;
214  float64_t *A_1=NULL, *A_2=NULL;
215  bool *map=NULL;
216 
217 
218  tstart=ttime.cur_time_diff(false);
219 
220  BufSize=_BufSize;
221  QPSolverTolRel=1e-9;
222 
223  uint32_t histSize = BufSize;
224  H=NULL;
225  b=NULL;
226  beta=NULL;
227  A=NULL;
228  subgrad=NULL;
229  diag_H=NULL;
230  I=NULL;
231  prevW=NULL;
232 
233 
234  H= (float64_t*) LIBBMRM_CALLOC(BufSize*BufSize, float64_t);
235 
236  if (H==NULL)
237  {
238  bmrm.exitflag=-2;
239  goto cleanup;
240  }
241 
242  ASSERT(nDim > 0);
243  ASSERT(BufSize > 0);
244  REQUIRE(BufSize < (std::numeric_limits<size_t>::max() / nDim),
245  "overflow: %u * %u > %u -- biggest possible BufSize=%u or nDim=%u\n",
246  BufSize, nDim, std::numeric_limits<size_t>::max(),
248  (std::numeric_limits<size_t>::max() / BufSize));
249 
250  A= (float64_t*) LIBBMRM_CALLOC(size_t(nDim)*size_t(BufSize), float64_t);
251 
252  if (A==NULL)
253  {
254  bmrm.exitflag=-2;
255  goto cleanup;
256  }
257 
258  b= (float64_t*) LIBBMRM_CALLOC(BufSize, float64_t);
259 
260  if (b==NULL)
261  {
262  bmrm.exitflag=-2;
263  goto cleanup;
264  }
265 
266  beta= (float64_t*) LIBBMRM_CALLOC(BufSize, float64_t);
267 
268  if (beta==NULL)
269  {
270  bmrm.exitflag=-2;
271  goto cleanup;
272  }
273 
274  subgrad= (float64_t*) LIBBMRM_CALLOC(nDim, float64_t);
275 
276  if (subgrad==NULL)
277  {
278  bmrm.exitflag=-2;
279  goto cleanup;
280  }
281 
282  diag_H= (float64_t*) LIBBMRM_CALLOC(BufSize, float64_t);
283 
284  if (diag_H==NULL)
285  {
286  bmrm.exitflag=-2;
287  goto cleanup;
288  }
289 
290  I= (uint32_t*) LIBBMRM_CALLOC(BufSize, uint32_t);
291 
292  if (I==NULL)
293  {
294  bmrm.exitflag=-2;
295  goto cleanup;
296  }
297 
298  ICP_stats icp_stats;
299  icp_stats.maxCPs = BufSize;
300 
301  icp_stats.ICPcounter= (uint32_t*) LIBBMRM_CALLOC(BufSize, uint32_t);
302  if (icp_stats.ICPcounter==NULL)
303  {
304  bmrm.exitflag=-2;
305  goto cleanup;
306  }
307 
308  icp_stats.ICPs= (float64_t**) LIBBMRM_CALLOC(BufSize, float64_t*);
309  if (icp_stats.ICPs==NULL)
310  {
311  bmrm.exitflag=-2;
312  goto cleanup;
313  }
314 
315  icp_stats.ACPs= (uint32_t*) LIBBMRM_CALLOC(BufSize, uint32_t);
316  if (icp_stats.ACPs==NULL)
317  {
318  bmrm.exitflag=-2;
319  goto cleanup;
320  }
321 
322  /* Temporary buffers for ICP removal */
323  icp_stats.H_buff= (float64_t*) LIBBMRM_CALLOC(BufSize*BufSize, float64_t);
324  if (icp_stats.H_buff==NULL)
325  {
326  bmrm.exitflag=-2;
327  goto cleanup;
328  }
329 
330  map= (bool*) LIBBMRM_CALLOC(BufSize, bool);
331 
332  if (map==NULL)
333  {
334  bmrm.exitflag=-2;
335  goto cleanup;
336  }
337 
338  memset( (bool*) map, true, BufSize);
339 
340  cp_list= (bmrm_ll*) LIBBMRM_CALLOC(1, bmrm_ll);
341 
342  if (cp_list==NULL)
343  {
344  bmrm.exitflag=-2;
345  goto cleanup;
346  }
347 
348  prevW= (float64_t*) LIBBMRM_CALLOC(nDim, float64_t);
349 
350  if (prevW==NULL)
351  {
352  bmrm.exitflag=-2;
353  goto cleanup;
354  }
355 
356  bmrm.hist_Fp = SGVector< float64_t >(histSize);
357  bmrm.hist_Fd = SGVector< float64_t >(histSize);
358  bmrm.hist_wdist = SGVector< float64_t >(histSize);
359 
360  /* Iinitial solution */
361  R=machine->risk(subgrad, W);
362 
363  bmrm.nCP=0;
364  bmrm.nIter=0;
365  bmrm.exitflag=0;
366 
367  b[0]=-R;
368 
369  /* Cutting plane auxiliary double linked list */
370 
371  LIBBMRM_MEMCPY(A, subgrad, nDim*sizeof(float64_t));
372  map[0]=false;
373  cp_list->address=&A[0];
374  cp_list->idx=0;
375  cp_list->prev=NULL;
376  cp_list->next=NULL;
377  CPList_head=cp_list;
378  CPList_tail=cp_list;
379 
380  /* Compute initial value of Fp, Fd, assuming that W is zero vector */
381 
382  sq_norm_W=0;
383  bmrm.Fp=R+0.5*_lambda*sq_norm_W;
384  bmrm.Fd=-LIBBMRM_PLUS_INF;
385 
386  tstop=ttime.cur_time_diff(false);
387 
388  /* Verbose output */
389  SG_SINFO("%4d: tim=%.3lf, Fp=%lf, Fd=%lf, R=%lf\n",
390  bmrm.nIter, tstop-tstart, bmrm.Fp, bmrm.Fd, R);
391 
392  /* store Fp, Fd and wdist history */
393  bmrm.hist_Fp[0]=bmrm.Fp;
394  bmrm.hist_Fd[0]=bmrm.Fd;
395  bmrm.hist_wdist[0]=0.0;
396 
397  if (store_train_info)
398  helper = machine->get_helper();
399 
400  /* main loop */
401  ASSERT(bmrm.nCP<BufSize);
402  while (bmrm.exitflag==0)
403  {
404  tstart=ttime.cur_time_diff(false);
405  bmrm.nIter++;
406 
407  /* Update H */
408 
409  if (bmrm.nCP>0)
410  {
411  A_2=get_cutting_plane(CPList_tail);
412  cp_ptr=CPList_head;
413 
414  for (uint32_t i=0; i<bmrm.nCP; ++i)
415  {
416  A_1=get_cutting_plane(cp_ptr);
417  cp_ptr=cp_ptr->next;
418  rsum= CMath::dot(A_1, A_2, nDim);
419 
420  H[LIBBMRM_INDEX(bmrm.nCP, i, BufSize)]
421  = H[LIBBMRM_INDEX(i, bmrm.nCP, BufSize)]
422  = rsum/_lambda;
423  }
424  }
425 
426  A_2=get_cutting_plane(CPList_tail);
427  rsum = CMath::dot(A_2, A_2, nDim);
428 
429  H[LIBBMRM_INDEX(bmrm.nCP, bmrm.nCP, BufSize)]=rsum/_lambda;
430 
431  diag_H[bmrm.nCP]=H[LIBBMRM_INDEX(bmrm.nCP, bmrm.nCP, BufSize)];
432  I[bmrm.nCP]=1;
433 
434  beta[bmrm.nCP]=0.0; // [beta; 0]
435  bmrm.nCP++;
436  ASSERT(bmrm.nCP<BufSize);
437 
438 #if 0
439  /* TODO: scaling...*/
440  float64_t scale = CMath::max(diag_H, BufSize)/(1000.0*_lambda);
441  SGVector<float64_t> sb(bmrm.nCP);
442  sb.zero();
443  sb.vec1_plus_scalar_times_vec2(sb.vector, 1/scale, b, bmrm.nCP);
444 
445  SGVector<float64_t> sh(bmrm.nCP);
446  sh.zero();
447  sb.vec1_plus_scalar_times_vec2(sh.vector, 1/scale, diag_H, bmrm.nCP);
448 
449  qp_exitflag =
450  libqp_splx_solver(&get_col, sh.vector, sb.vector, &C, I, &S, beta,
451  bmrm.nCP, QPSolverMaxIter, 0.0, QPSolverTolRel, -LIBBMRM_PLUS_INF, 0);
452 #else
453  /* call QP solver */
454  qp_exitflag=libqp_splx_solver(&get_col, diag_H, b, &C, I, &S, beta,
455  bmrm.nCP, QPSolverMaxIter, 0.0, QPSolverTolRel, -LIBBMRM_PLUS_INF, 0);
456 #endif
457 
458  bmrm.qp_exitflag=qp_exitflag.exitflag;
459 
460  /* Update ICPcounter (add one to unused and reset used)
461  * + compute number of active CPs */
462  bmrm.nzA=0;
463 
464  for (uint32_t aaa=0; aaa<bmrm.nCP; ++aaa)
465  {
466  if (beta[aaa]>epsilon)
467  {
468  ++bmrm.nzA;
469  icp_stats.ICPcounter[aaa]=0;
470  }
471  else
472  {
473  icp_stats.ICPcounter[aaa]+=1;
474  }
475  }
476 
477  /* W update */
478  memset(W, 0, sizeof(float64_t)*nDim);
479  cp_ptr=CPList_head;
480  for (uint32_t j=0; j<bmrm.nCP; ++j)
481  {
482  A_1=get_cutting_plane(cp_ptr);
483  cp_ptr=cp_ptr->next;
484  SGVector<float64_t>::vec1_plus_scalar_times_vec2(W, -beta[j]/_lambda, A_1, nDim);
485  }
486 
487  /* risk and subgradient computation */
488  R = machine->risk(subgrad, W);
489  add_cutting_plane(&CPList_tail, map, A,
490  find_free_idx(map, BufSize), subgrad, nDim);
491 
492  sq_norm_W=CMath::dot(W, W, nDim);
493  b[bmrm.nCP]=CMath::dot(subgrad, W, nDim) - R;
494 
495  sq_norm_Wdiff=0.0;
496  for (uint32_t j=0; j<nDim; ++j)
497  {
498  sq_norm_Wdiff+=(W[j]-prevW[j])*(W[j]-prevW[j]);
499  }
500 
501  bmrm.Fp=R+0.5*_lambda*sq_norm_W;
502  bmrm.Fd=-qp_exitflag.QP;
503  wdist=CMath::sqrt(sq_norm_Wdiff);
504 
505  /* Stopping conditions */
506  if (bmrm.Fp - bmrm.Fd <= TolRel*LIBBMRM_ABS(bmrm.Fp))
507  bmrm.exitflag=1;
508 
509  if (bmrm.Fp - bmrm.Fd <= TolAbs)
510  bmrm.exitflag=2;
511 
512  tstop=ttime.cur_time_diff(false);
513 
514  /* Verbose output */
515  SG_SINFO("%4d: tim=%.3lf, Fp=%lf, Fd=%lf, (Fp-Fd)=%lf, (Fp-Fd)/Fp=%lf, R=%lf, nCP=%d, nzA=%d, QPexitflag=%d\n",
516  bmrm.nIter, tstop-tstart, bmrm.Fp, bmrm.Fd, bmrm.Fp-bmrm.Fd,
517  (bmrm.Fp-bmrm.Fd)/bmrm.Fp, R, bmrm.nCP, bmrm.nzA, qp_exitflag.exitflag);
518 
519  // iteration exceeds histSize
520  if (bmrm.nIter >= histSize)
521  {
522  histSize += BufSize;
523  bmrm.hist_Fp.resize_vector(histSize);
524  bmrm.hist_Fd.resize_vector(histSize);
525  bmrm.hist_wdist.resize_vector(histSize);
526  }
527 
528  /* Keep Fp, Fd and w_dist history */
529  ASSERT(bmrm.nIter < histSize);
530  bmrm.hist_Fp[bmrm.nIter]=bmrm.Fp;
531  bmrm.hist_Fd[bmrm.nIter]=bmrm.Fd;
532  bmrm.hist_wdist[bmrm.nIter]=wdist;
533 
534  /* keep W (for wdist history track) */
535  LIBBMRM_MEMCPY(prevW, W, nDim*sizeof(float64_t));
536 
537  /* Inactive Cutting Planes (ICP) removal */
538  if (cleanICP)
539  {
540  clean_icp(&icp_stats, bmrm, &CPList_head, &CPList_tail, H, diag_H, beta, map, cleanAfter, b, I);
541  ASSERT(bmrm.nCP<BufSize);
542  }
543 
544  // next CP would exceed BufSize
545  if (bmrm.nCP+1 >= BufSize)
546  bmrm.exitflag=-1;
547 
548  /* Debug: compute objective and training error */
549  if (store_train_info)
550  {
551  float64_t info_tstart=ttime.cur_time_diff(false);
552 
553  SGVector<float64_t> w_info(W, nDim, false);
554  float64_t primal = CSOSVMHelper::primal_objective(w_info, model, _lambda);
555  float64_t train_error = CSOSVMHelper::average_loss(w_info, model);
556  helper->add_debug_info(primal, bmrm.nIter, train_error);
557 
558  float64_t info_tstop=ttime.cur_time_diff(false);
559 
560  SG_SINFO("On iteration %4d, tim=%.3lf, primal=%.3lf, train_error=%lf\n", bmrm.nIter, info_tstop-info_tstart, primal, train_error);
561  }
562 
563  } /* end of main loop */
564 
565  if (store_train_info)
566  {
567  helper->terminate();
568  SG_UNREF(helper);
569  }
570 
571  ASSERT(bmrm.nIter+1 <= histSize);
572  bmrm.hist_Fp.resize_vector(bmrm.nIter+1);
573  bmrm.hist_Fd.resize_vector(bmrm.nIter+1);
574  bmrm.hist_wdist.resize_vector(bmrm.nIter+1);
575 
576  cp_ptr=CPList_head;
577 
578  while(cp_ptr!=NULL)
579  {
580  cp_ptr2=cp_ptr;
581  cp_ptr=cp_ptr->next;
582  LIBBMRM_FREE(cp_ptr2);
583  cp_ptr2=NULL;
584  }
585 
586  cp_list=NULL;
587 
588 cleanup:
589 
590  LIBBMRM_FREE(H);
591  LIBBMRM_FREE(b);
592  LIBBMRM_FREE(beta);
593  LIBBMRM_FREE(A);
594  LIBBMRM_FREE(subgrad);
595  LIBBMRM_FREE(diag_H);
596  LIBBMRM_FREE(I);
597  LIBBMRM_FREE(icp_stats.ICPcounter);
598  LIBBMRM_FREE(icp_stats.ICPs);
599  LIBBMRM_FREE(icp_stats.ACPs);
600  LIBBMRM_FREE(icp_stats.H_buff);
601  LIBBMRM_FREE(map);
602  LIBBMRM_FREE(prevW);
603 
604  if (cp_list)
605  LIBBMRM_FREE(cp_list);
606 
607  SG_UNREF(model);
608 
609  return(bmrm);
610 }
611 }
612 #endif //USE_GPL_SHOGUN
#define REQUIRE(x,...)
Definition: SGIO.h:206
static float64_t primal_objective(SGVector< float64_t > w, CStructuredModel *model, float64_t lbda)
Definition: SOSVMHelper.cpp:57
#define ASSERT(x)
Definition: SGIO.h:201
double float64_t
Definition: common.h:50
long double floatmax_t
Definition: common.h:51
static T max(T a, T b)
Definition: Math.h:168
static float64_t dot(const bool *v1, const bool *v2, int32_t n)
Compute dot product between v1 and v2 (blas optimized)
Definition: Math.h:627
static float64_t average_loss(SGVector< float64_t > w, CStructuredModel *model, bool is_ub=false)
Definition: SOSVMHelper.cpp:88
static void vec1_plus_scalar_times_vec2(T *vec1, const T scalar, const T *vec2, int32_t n)
x=x+alpha*y
Definition: SGVector.cpp:529
#define SG_UNREF(x)
Definition: SGObject.h:55
all of classes and functions are contained in the shogun namespace
Definition: class_list.h:18
#define SG_SERROR(...)
Definition: SGIO.h:179
void scale(Matrix A, Matrix B, typename Matrix::Scalar alpha)
Definition: Core.h:94
#define SG_SINFO(...)
Definition: SGIO.h:173
Matrix::Scalar max(Matrix m)
Definition: Redux.h:68
static float32_t sqrt(float32_t x)
Definition: Math.h:459

SHOGUN Machine Learning Toolbox - Documentation