SHOGUN  4.0.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
libbmrm.cpp
Go to the documentation of this file.
1 /*
2  * This program is free software; you can redistribute it and/or modify
3  * it under the terms of the GNU General Public License as published by
4  * the Free Software Foundation; either version 3 of the License, or
5  * (at your option) any later version.
6  *
7  * libbmrm.h: Implementation of the BMRM solver for SO training
8  *
9  * Copyright (C) 2012 Michal Uricar, uricamic@cmp.felk.cvut.cz
10  *
11  * Implementation of the BMRM solver
12  *--------------------------------------------------------------------- */
13 
15 #include <shogun/lib/external/libqp.h>
16 #include <shogun/lib/Time.h>
18 
19 #include <climits>
20 #include <limits>
21 
22 namespace shogun
23 {
24 static const uint32_t QPSolverMaxIter=0xFFFFFFFF;
25 static const float64_t epsilon=0.0;
26 
27 static float64_t *H;
28 uint32_t BufSize;
29 
31  bmrm_ll** tail,
32  bool* map,
33  float64_t* A,
34  uint32_t free_idx,
35  float64_t* cp_data,
36  uint32_t dim)
37 {
38  REQUIRE(map[free_idx],
39  "add_cutting_plane: CP index %u is not free\n", free_idx)
40 
41  LIBBMRM_MEMCPY(A+free_idx*dim, cp_data, dim*sizeof(float64_t));
42  map[free_idx]=false;
43 
45 
46  if (cp==NULL)
47  {
48  SG_SERROR("Out of memory.\n")
49  return;
50  }
51 
52  cp->address=A+(free_idx*dim);
53  cp->prev=*tail;
54  cp->next=NULL;
55  cp->idx=free_idx;
56  (*tail)->next=cp;
57  *tail=cp;
58 }
59 
61  bmrm_ll** head,
62  bmrm_ll** tail,
63  bool* map,
64  float64_t* icp)
65 {
66  bmrm_ll *cp_list_ptr=*head;
67 
68  while(cp_list_ptr->address != icp)
69  {
70  cp_list_ptr=cp_list_ptr->next;
71  }
72 
73  if (cp_list_ptr==*head)
74  {
75  *head=(*head)->next;
76  cp_list_ptr->next->prev=NULL;
77  }
78  else if (cp_list_ptr==*tail)
79  {
80  *tail=(*tail)->prev;
81  cp_list_ptr->prev->next=NULL;
82  }
83  else
84  {
85  cp_list_ptr->prev->next=cp_list_ptr->next;
86  cp_list_ptr->next->prev=cp_list_ptr->prev;
87  }
88 
89  map[cp_list_ptr->idx]=true;
90  LIBBMRM_FREE(cp_list_ptr);
91 }
92 
93 void clean_icp(ICP_stats* icp_stats,
94  BmrmStatistics& bmrm,
95  bmrm_ll** head,
96  bmrm_ll** tail,
97  float64_t*& Hmat,
98  float64_t*& diag_H,
99  float64_t*& beta,
100  bool*& map,
101  uint32_t cleanAfter,
102  float64_t*& b,
103  uint32_t*& I,
104  uint32_t cp_models
105  )
106 {
107  /* find ICP */
108  uint32_t cntICP=0;
109  uint32_t cntACP=0;
110  bmrm_ll* cp_ptr=*head;
111  uint32_t tmp_idx=0;
112 
113  while (cp_ptr != *tail)
114  {
115  if (icp_stats->ICPcounter[tmp_idx++]>=cleanAfter)
116  {
117  icp_stats->ICPs[cntICP++]=cp_ptr->address;
118  }
119  else
120  {
121  icp_stats->ACPs[cntACP++]=tmp_idx-1;
122  }
123 
124  cp_ptr=cp_ptr->next;
125  }
126 
127  /* do ICP removal */
128  if (cntICP > 0)
129  {
130  uint32_t nCP_new=bmrm.nCP-cntICP;
131 
132  for (uint32_t i=0; i<cntICP; ++i)
133  {
134  tmp_idx=0;
135  cp_ptr=*head;
136 
137  while(cp_ptr->address != icp_stats->ICPs[i])
138  {
139  cp_ptr=cp_ptr->next;
140  tmp_idx++;
141  }
142 
143  remove_cutting_plane(head, tail, map, icp_stats->ICPs[i]);
144 
145  LIBBMRM_MEMMOVE(b+tmp_idx, b+tmp_idx+1,
146  (bmrm.nCP+cp_models-tmp_idx)*sizeof(float64_t));
147  LIBBMRM_MEMMOVE(beta+tmp_idx, beta+tmp_idx+1,
148  (bmrm.nCP-tmp_idx)*sizeof(float64_t));
149  LIBBMRM_MEMMOVE(diag_H+tmp_idx, diag_H+tmp_idx+1,
150  (bmrm.nCP-tmp_idx)*sizeof(float64_t));
151  LIBBMRM_MEMMOVE(I+tmp_idx, I+tmp_idx+1,
152  (bmrm.nCP-tmp_idx)*sizeof(uint32_t));
153  LIBBMRM_MEMMOVE(icp_stats->ICPcounter+tmp_idx, icp_stats->ICPcounter+tmp_idx+1,
154  (bmrm.nCP-tmp_idx)*sizeof(uint32_t));
155  }
156 
157  /* H */
158  for (uint32_t i=0; i < nCP_new; ++i)
159  {
160  for (uint32_t j=0; j < nCP_new; ++j)
161  {
162  icp_stats->H_buff[LIBBMRM_INDEX(i, j, icp_stats->maxCPs)]=
163  Hmat[LIBBMRM_INDEX(icp_stats->ACPs[i], icp_stats->ACPs[j], icp_stats->maxCPs)];
164  }
165  }
166 
167  for (uint32_t i=0; i<nCP_new; ++i)
168  for (uint32_t j=0; j<nCP_new; ++j)
169  Hmat[LIBBMRM_INDEX(i, j, icp_stats->maxCPs)]=
170  icp_stats->H_buff[LIBBMRM_INDEX(i, j, icp_stats->maxCPs)];
171 
172  bmrm.nCP=nCP_new;
173  ASSERT(bmrm.nCP<BufSize);
174  }
175 }
176 
177 /*----------------------------------------------------------------------
178  Returns pointer at i-th column of Hessian matrix.
179  ----------------------------------------------------------------------*/
180 static const float64_t *get_col( uint32_t i)
181 {
182  return( &H[ BufSize*i ] );
183 }
184 
186  CDualLibQPBMSOSVM *machine,
187  float64_t* W,
188  float64_t TolRel,
189  float64_t TolAbs,
190  float64_t _lambda,
191  uint32_t _BufSize,
192  bool cleanICP,
193  uint32_t cleanAfter,
194  float64_t K,
195  uint32_t Tmax,
196  bool store_train_info)
197 {
198  BmrmStatistics bmrm;
199  libqp_state_T qp_exitflag={0, 0, 0, 0};
200  float64_t *b, *beta, *diag_H, *prevW;
201  float64_t R, *subgrad, *A, QPSolverTolRel, C=1.0, wdist=0.0;
202  floatmax_t rsum, sq_norm_W, sq_norm_Wdiff=0.0;
203  uint32_t *I;
204  uint8_t S=1;
205  CStructuredModel* model=machine->get_model();
206  uint32_t nDim=model->get_dim();
207  CSOSVMHelper* helper = NULL;
208 
209  CTime ttime;
210  float64_t tstart, tstop;
211 
212  bmrm_ll *CPList_head, *CPList_tail, *cp_ptr, *cp_ptr2, *cp_list=NULL;
213  float64_t *A_1=NULL, *A_2=NULL;
214  bool *map=NULL;
215 
216 
217  tstart=ttime.cur_time_diff(false);
218 
219  BufSize=_BufSize;
220  QPSolverTolRel=1e-9;
221 
222  uint32_t histSize = BufSize;
223  H=NULL;
224  b=NULL;
225  beta=NULL;
226  A=NULL;
227  subgrad=NULL;
228  diag_H=NULL;
229  I=NULL;
230  prevW=NULL;
231 
232 
234 
235  if (H==NULL)
236  {
237  bmrm.exitflag=-2;
238  goto cleanup;
239  }
240 
241  ASSERT(nDim > 0);
242  ASSERT(BufSize > 0);
243  REQUIRE(BufSize < (std::numeric_limits<size_t>::max() / nDim),
244  "overflow: %u * %u > %u -- biggest possible BufSize=%u or nDim=%u\n",
245  BufSize, nDim, std::numeric_limits<size_t>::max(),
247  (std::numeric_limits<size_t>::max() / BufSize));
248 
249  A= (float64_t*) LIBBMRM_CALLOC(size_t(nDim)*size_t(BufSize), float64_t);
250 
251  if (A==NULL)
252  {
253  bmrm.exitflag=-2;
254  goto cleanup;
255  }
256 
257  b= (float64_t*) LIBBMRM_CALLOC(BufSize, float64_t);
258 
259  if (b==NULL)
260  {
261  bmrm.exitflag=-2;
262  goto cleanup;
263  }
264 
265  beta= (float64_t*) LIBBMRM_CALLOC(BufSize, float64_t);
266 
267  if (beta==NULL)
268  {
269  bmrm.exitflag=-2;
270  goto cleanup;
271  }
272 
273  subgrad= (float64_t*) LIBBMRM_CALLOC(nDim, float64_t);
274 
275  if (subgrad==NULL)
276  {
277  bmrm.exitflag=-2;
278  goto cleanup;
279  }
280 
281  diag_H= (float64_t*) LIBBMRM_CALLOC(BufSize, float64_t);
282 
283  if (diag_H==NULL)
284  {
285  bmrm.exitflag=-2;
286  goto cleanup;
287  }
288 
289  I= (uint32_t*) LIBBMRM_CALLOC(BufSize, uint32_t);
290 
291  if (I==NULL)
292  {
293  bmrm.exitflag=-2;
294  goto cleanup;
295  }
296 
297  ICP_stats icp_stats;
298  icp_stats.maxCPs = BufSize;
299 
300  icp_stats.ICPcounter= (uint32_t*) LIBBMRM_CALLOC(BufSize, uint32_t);
301  if (icp_stats.ICPcounter==NULL)
302  {
303  bmrm.exitflag=-2;
304  goto cleanup;
305  }
306 
307  icp_stats.ICPs= (float64_t**) LIBBMRM_CALLOC(BufSize, float64_t*);
308  if (icp_stats.ICPs==NULL)
309  {
310  bmrm.exitflag=-2;
311  goto cleanup;
312  }
313 
314  icp_stats.ACPs= (uint32_t*) LIBBMRM_CALLOC(BufSize, uint32_t);
315  if (icp_stats.ACPs==NULL)
316  {
317  bmrm.exitflag=-2;
318  goto cleanup;
319  }
320 
321  /* Temporary buffers for ICP removal */
322  icp_stats.H_buff= (float64_t*) LIBBMRM_CALLOC(BufSize*BufSize, float64_t);
323  if (icp_stats.H_buff==NULL)
324  {
325  bmrm.exitflag=-2;
326  goto cleanup;
327  }
328 
329  map= (bool*) LIBBMRM_CALLOC(BufSize, bool);
330 
331  if (map==NULL)
332  {
333  bmrm.exitflag=-2;
334  goto cleanup;
335  }
336 
337  memset( (bool*) map, true, BufSize);
338 
339  cp_list= (bmrm_ll*) LIBBMRM_CALLOC(1, bmrm_ll);
340 
341  if (cp_list==NULL)
342  {
343  bmrm.exitflag=-2;
344  goto cleanup;
345  }
346 
347  prevW= (float64_t*) LIBBMRM_CALLOC(nDim, float64_t);
348 
349  if (prevW==NULL)
350  {
351  bmrm.exitflag=-2;
352  goto cleanup;
353  }
354 
355  bmrm.hist_Fp = SGVector< float64_t >(histSize);
356  bmrm.hist_Fd = SGVector< float64_t >(histSize);
357  bmrm.hist_wdist = SGVector< float64_t >(histSize);
358 
359  /* Iinitial solution */
360  R=machine->risk(subgrad, W);
361 
362  bmrm.nCP=0;
363  bmrm.nIter=0;
364  bmrm.exitflag=0;
365 
366  b[0]=-R;
367 
368  /* Cutting plane auxiliary double linked list */
369 
370  LIBBMRM_MEMCPY(A, subgrad, nDim*sizeof(float64_t));
371  map[0]=false;
372  cp_list->address=&A[0];
373  cp_list->idx=0;
374  cp_list->prev=NULL;
375  cp_list->next=NULL;
376  CPList_head=cp_list;
377  CPList_tail=cp_list;
378 
379  /* Compute initial value of Fp, Fd, assuming that W is zero vector */
380 
381  sq_norm_W=0;
382  bmrm.Fp=R+0.5*_lambda*sq_norm_W;
383  bmrm.Fd=-LIBBMRM_PLUS_INF;
384 
385  tstop=ttime.cur_time_diff(false);
386 
387  /* Verbose output */
388  SG_SINFO("%4d: tim=%.3lf, Fp=%lf, Fd=%lf, R=%lf\n",
389  bmrm.nIter, tstop-tstart, bmrm.Fp, bmrm.Fd, R);
390 
391  /* store Fp, Fd and wdist history */
392  bmrm.hist_Fp[0]=bmrm.Fp;
393  bmrm.hist_Fd[0]=bmrm.Fd;
394  bmrm.hist_wdist[0]=0.0;
395 
396  if (store_train_info)
397  helper = machine->get_helper();
398 
399  /* main loop */
400  ASSERT(bmrm.nCP<BufSize);
401  while (bmrm.exitflag==0)
402  {
403  tstart=ttime.cur_time_diff(false);
404  bmrm.nIter++;
405 
406  /* Update H */
407 
408  if (bmrm.nCP>0)
409  {
410  A_2=get_cutting_plane(CPList_tail);
411  cp_ptr=CPList_head;
412 
413  for (uint32_t i=0; i<bmrm.nCP; ++i)
414  {
415  A_1=get_cutting_plane(cp_ptr);
416  cp_ptr=cp_ptr->next;
417  rsum= CMath::dot(A_1, A_2, nDim);
418 
419  H[LIBBMRM_INDEX(bmrm.nCP, i, BufSize)]
420  = H[LIBBMRM_INDEX(i, bmrm.nCP, BufSize)]
421  = rsum/_lambda;
422  }
423  }
424 
425  A_2=get_cutting_plane(CPList_tail);
426  rsum = CMath::dot(A_2, A_2, nDim);
427 
428  H[LIBBMRM_INDEX(bmrm.nCP, bmrm.nCP, BufSize)]=rsum/_lambda;
429 
430  diag_H[bmrm.nCP]=H[LIBBMRM_INDEX(bmrm.nCP, bmrm.nCP, BufSize)];
431  I[bmrm.nCP]=1;
432 
433  beta[bmrm.nCP]=0.0; // [beta; 0]
434  bmrm.nCP++;
435  ASSERT(bmrm.nCP<BufSize);
436 
437 #if 0
438  /* TODO: scaling...*/
439  float64_t scale = CMath::max(diag_H, BufSize)/(1000.0*_lambda);
440  SGVector<float64_t> sb(bmrm.nCP);
441  sb.zero();
442  sb.vec1_plus_scalar_times_vec2(sb.vector, 1/scale, b, bmrm.nCP);
443 
444  SGVector<float64_t> sh(bmrm.nCP);
445  sh.zero();
446  sb.vec1_plus_scalar_times_vec2(sh.vector, 1/scale, diag_H, bmrm.nCP);
447 
448  qp_exitflag =
449  libqp_splx_solver(&get_col, sh.vector, sb.vector, &C, I, &S, beta,
450  bmrm.nCP, QPSolverMaxIter, 0.0, QPSolverTolRel, -LIBBMRM_PLUS_INF, 0);
451 #else
452  /* call QP solver */
453  qp_exitflag=libqp_splx_solver(&get_col, diag_H, b, &C, I, &S, beta,
454  bmrm.nCP, QPSolverMaxIter, 0.0, QPSolverTolRel, -LIBBMRM_PLUS_INF, 0);
455 #endif
456 
457  bmrm.qp_exitflag=qp_exitflag.exitflag;
458 
459  /* Update ICPcounter (add one to unused and reset used)
460  * + compute number of active CPs */
461  bmrm.nzA=0;
462 
463  for (uint32_t aaa=0; aaa<bmrm.nCP; ++aaa)
464  {
465  if (beta[aaa]>epsilon)
466  {
467  ++bmrm.nzA;
468  icp_stats.ICPcounter[aaa]=0;
469  }
470  else
471  {
472  icp_stats.ICPcounter[aaa]+=1;
473  }
474  }
475 
476  /* W update */
477  memset(W, 0, sizeof(float64_t)*nDim);
478  cp_ptr=CPList_head;
479  for (uint32_t j=0; j<bmrm.nCP; ++j)
480  {
481  A_1=get_cutting_plane(cp_ptr);
482  cp_ptr=cp_ptr->next;
483  SGVector<float64_t>::vec1_plus_scalar_times_vec2(W, -beta[j]/_lambda, A_1, nDim);
484  }
485 
486  /* risk and subgradient computation */
487  R = machine->risk(subgrad, W);
488  add_cutting_plane(&CPList_tail, map, A,
489  find_free_idx(map, BufSize), subgrad, nDim);
490 
491  sq_norm_W=CMath::dot(W, W, nDim);
492  b[bmrm.nCP]=CMath::dot(subgrad, W, nDim) - R;
493 
494  sq_norm_Wdiff=0.0;
495  for (uint32_t j=0; j<nDim; ++j)
496  {
497  sq_norm_Wdiff+=(W[j]-prevW[j])*(W[j]-prevW[j]);
498  }
499 
500  bmrm.Fp=R+0.5*_lambda*sq_norm_W;
501  bmrm.Fd=-qp_exitflag.QP;
502  wdist=CMath::sqrt(sq_norm_Wdiff);
503 
504  /* Stopping conditions */
505  if (bmrm.Fp - bmrm.Fd <= TolRel*LIBBMRM_ABS(bmrm.Fp))
506  bmrm.exitflag=1;
507 
508  if (bmrm.Fp - bmrm.Fd <= TolAbs)
509  bmrm.exitflag=2;
510 
511  tstop=ttime.cur_time_diff(false);
512 
513  /* Verbose output */
514  SG_SINFO("%4d: tim=%.3lf, Fp=%lf, Fd=%lf, (Fp-Fd)=%lf, (Fp-Fd)/Fp=%lf, R=%lf, nCP=%d, nzA=%d, QPexitflag=%d\n",
515  bmrm.nIter, tstop-tstart, bmrm.Fp, bmrm.Fd, bmrm.Fp-bmrm.Fd,
516  (bmrm.Fp-bmrm.Fd)/bmrm.Fp, R, bmrm.nCP, bmrm.nzA, qp_exitflag.exitflag);
517 
518  // iteration exceeds histSize
519  if (bmrm.nIter >= histSize)
520  {
521  histSize += BufSize;
522  bmrm.hist_Fp.resize_vector(histSize);
523  bmrm.hist_Fd.resize_vector(histSize);
524  bmrm.hist_wdist.resize_vector(histSize);
525  }
526 
527  /* Keep Fp, Fd and w_dist history */
528  ASSERT(bmrm.nIter < histSize);
529  bmrm.hist_Fp[bmrm.nIter]=bmrm.Fp;
530  bmrm.hist_Fd[bmrm.nIter]=bmrm.Fd;
531  bmrm.hist_wdist[bmrm.nIter]=wdist;
532 
533  /* keep W (for wdist history track) */
534  LIBBMRM_MEMCPY(prevW, W, nDim*sizeof(float64_t));
535 
536  /* Inactive Cutting Planes (ICP) removal */
537  if (cleanICP)
538  {
539  clean_icp(&icp_stats, bmrm, &CPList_head, &CPList_tail, H, diag_H, beta, map, cleanAfter, b, I);
540  ASSERT(bmrm.nCP<BufSize);
541  }
542 
543  // next CP would exceed BufSize
544  if (bmrm.nCP+1 >= BufSize)
545  bmrm.exitflag=-1;
546 
547  /* Debug: compute objective and training error */
548  if (store_train_info)
549  {
550  float64_t info_tstart=ttime.cur_time_diff(false);
551 
552  SGVector<float64_t> w_info(W, nDim, false);
553  float64_t primal = CSOSVMHelper::primal_objective(w_info, model, _lambda);
554  float64_t train_error = CSOSVMHelper::average_loss(w_info, model);
555  helper->add_debug_info(primal, bmrm.nIter, train_error);
556 
557  float64_t info_tstop=ttime.cur_time_diff(false);
558 
559  SG_SINFO("On iteration %4d, tim=%.3lf, primal=%.3lf, train_error=%lf\n", bmrm.nIter, info_tstop-info_tstart, primal, train_error);
560  }
561 
562  } /* end of main loop */
563 
564  if (store_train_info)
565  {
566  helper->terminate();
567  SG_UNREF(helper);
568  }
569 
570  ASSERT(bmrm.nIter+1 <= histSize);
571  bmrm.hist_Fp.resize_vector(bmrm.nIter+1);
572  bmrm.hist_Fd.resize_vector(bmrm.nIter+1);
573  bmrm.hist_wdist.resize_vector(bmrm.nIter+1);
574 
575  cp_ptr=CPList_head;
576 
577  while(cp_ptr!=NULL)
578  {
579  cp_ptr2=cp_ptr;
580  cp_ptr=cp_ptr->next;
581  LIBBMRM_FREE(cp_ptr2);
582  cp_ptr2=NULL;
583  }
584 
585  cp_list=NULL;
586 
587 cleanup:
588 
589  LIBBMRM_FREE(H);
590  LIBBMRM_FREE(b);
591  LIBBMRM_FREE(beta);
592  LIBBMRM_FREE(A);
593  LIBBMRM_FREE(subgrad);
594  LIBBMRM_FREE(diag_H);
595  LIBBMRM_FREE(I);
596  LIBBMRM_FREE(icp_stats.ICPcounter);
597  LIBBMRM_FREE(icp_stats.ICPs);
598  LIBBMRM_FREE(icp_stats.ACPs);
599  LIBBMRM_FREE(icp_stats.H_buff);
600  LIBBMRM_FREE(map);
601  LIBBMRM_FREE(prevW);
602 
603  if (cp_list)
604  LIBBMRM_FREE(cp_list);
605 
606  SG_UNREF(model);
607 
608  return(bmrm);
609 }
610 }

SHOGUN Machine Learning Toolbox - Documentation