SHOGUN  3.2.1
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
libbmrm.cpp
Go to the documentation of this file.
1 /*
2  * This program is free software; you can redistribute it and/or modify
3  * it under the terms of the GNU General Public License as published by
4  * the Free Software Foundation; either version 3 of the License, or
5  * (at your option) any later version.
6  *
7  * libbmrm.h: Implementation of the BMRM solver for SO training
8  *
9  * Copyright (C) 2012 Michal Uricar, uricamic@cmp.felk.cvut.cz
10  *
11  * Implementation of the BMRM solver
12  *--------------------------------------------------------------------- */
13 
15 #include <shogun/lib/external/libqp.h>
16 #include <shogun/lib/Time.h>
17 
18 #include <climits>
19 #include <limits>
20 
21 namespace shogun
22 {
23 static const uint32_t QPSolverMaxIter=0xFFFFFFFF;
24 static const float64_t epsilon=0.0;
25 
26 static float64_t *H;
27 uint32_t BufSize;
28 
30  bmrm_ll** tail,
31  bool* map,
32  float64_t* A,
33  uint32_t free_idx,
34  float64_t* cp_data,
35  uint32_t dim)
36 {
37  REQUIRE(map[free_idx],
38  "add_cutting_plane: CP index %u is not free\n", free_idx)
39 
40  LIBBMRM_MEMCPY(A+free_idx*dim, cp_data, dim*sizeof(float64_t));
41  map[free_idx]=false;
42 
44 
45  if (cp==NULL)
46  {
47  SG_SERROR("Out of memory.\n")
48  return;
49  }
50 
51  cp->address=A+(free_idx*dim);
52  cp->prev=*tail;
53  cp->next=NULL;
54  cp->idx=free_idx;
55  (*tail)->next=cp;
56  *tail=cp;
57 }
58 
60  bmrm_ll** head,
61  bmrm_ll** tail,
62  bool* map,
63  float64_t* icp)
64 {
65  bmrm_ll *cp_list_ptr=*head;
66 
67  while(cp_list_ptr->address != icp)
68  {
69  cp_list_ptr=cp_list_ptr->next;
70  }
71 
72  if (cp_list_ptr==*head)
73  {
74  *head=(*head)->next;
75  cp_list_ptr->next->prev=NULL;
76  }
77  else if (cp_list_ptr==*tail)
78  {
79  *tail=(*tail)->prev;
80  cp_list_ptr->prev->next=NULL;
81  }
82  else
83  {
84  cp_list_ptr->prev->next=cp_list_ptr->next;
85  cp_list_ptr->next->prev=cp_list_ptr->prev;
86  }
87 
88  map[cp_list_ptr->idx]=true;
89  LIBBMRM_FREE(cp_list_ptr);
90 }
91 
92 void clean_icp(ICP_stats* icp_stats,
93  BmrmStatistics& bmrm,
94  bmrm_ll** head,
95  bmrm_ll** tail,
96  float64_t*& Hmat,
97  float64_t*& diag_H,
98  float64_t*& beta,
99  bool*& map,
100  uint32_t cleanAfter,
101  float64_t*& b,
102  uint32_t*& I,
103  uint32_t cp_models
104  )
105 {
106  /* find ICP */
107  uint32_t cntICP=0;
108  uint32_t cntACP=0;
109  bmrm_ll* cp_ptr=*head;
110  uint32_t tmp_idx=0;
111 
112  while (cp_ptr != *tail)
113  {
114  if (icp_stats->ICPcounter[tmp_idx++]>=cleanAfter)
115  {
116  icp_stats->ICPs[cntICP++]=cp_ptr->address;
117  }
118  else
119  {
120  icp_stats->ACPs[cntACP++]=tmp_idx-1;
121  }
122 
123  cp_ptr=cp_ptr->next;
124  }
125 
126  /* do ICP removal */
127  if (cntICP > 0)
128  {
129  uint32_t nCP_new=bmrm.nCP-cntICP;
130 
131  for (uint32_t i=0; i<cntICP; ++i)
132  {
133  tmp_idx=0;
134  cp_ptr=*head;
135 
136  while(cp_ptr->address != icp_stats->ICPs[i])
137  {
138  cp_ptr=cp_ptr->next;
139  tmp_idx++;
140  }
141 
142  remove_cutting_plane(head, tail, map, icp_stats->ICPs[i]);
143 
144  LIBBMRM_MEMMOVE(b+tmp_idx, b+tmp_idx+1,
145  (bmrm.nCP+cp_models-tmp_idx)*sizeof(float64_t));
146  LIBBMRM_MEMMOVE(beta+tmp_idx, beta+tmp_idx+1,
147  (bmrm.nCP-tmp_idx)*sizeof(float64_t));
148  LIBBMRM_MEMMOVE(diag_H+tmp_idx, diag_H+tmp_idx+1,
149  (bmrm.nCP-tmp_idx)*sizeof(float64_t));
150  LIBBMRM_MEMMOVE(I+tmp_idx, I+tmp_idx+1,
151  (bmrm.nCP-tmp_idx)*sizeof(uint32_t));
152  LIBBMRM_MEMMOVE(icp_stats->ICPcounter+tmp_idx, icp_stats->ICPcounter+tmp_idx+1,
153  (bmrm.nCP-tmp_idx)*sizeof(uint32_t));
154  }
155 
156  /* H */
157  for (uint32_t i=0; i < nCP_new; ++i)
158  {
159  for (uint32_t j=0; j < nCP_new; ++j)
160  {
161  icp_stats->H_buff[LIBBMRM_INDEX(i, j, icp_stats->maxCPs)]=
162  Hmat[LIBBMRM_INDEX(icp_stats->ACPs[i], icp_stats->ACPs[j], icp_stats->maxCPs)];
163  }
164  }
165 
166  for (uint32_t i=0; i<nCP_new; ++i)
167  for (uint32_t j=0; j<nCP_new; ++j)
168  Hmat[LIBBMRM_INDEX(i, j, icp_stats->maxCPs)]=
169  icp_stats->H_buff[LIBBMRM_INDEX(i, j, icp_stats->maxCPs)];
170 
171  bmrm.nCP=nCP_new;
172  ASSERT(bmrm.nCP<BufSize);
173  }
174 }
175 
176 /*----------------------------------------------------------------------
177  Returns pointer at i-th column of Hessian matrix.
178  ----------------------------------------------------------------------*/
179 static const float64_t *get_col( uint32_t i)
180 {
181  return( &H[ BufSize*i ] );
182 }
183 
185  CDualLibQPBMSOSVM *machine,
186  float64_t* W,
187  float64_t TolRel,
188  float64_t TolAbs,
189  float64_t _lambda,
190  uint32_t _BufSize,
191  bool cleanICP,
192  uint32_t cleanAfter,
193  float64_t K,
194  uint32_t Tmax,
195  bool store_train_info)
196 {
197  BmrmStatistics bmrm;
198  libqp_state_T qp_exitflag={0, 0, 0, 0};
199  float64_t *b, *beta, *diag_H, *prevW;
200  float64_t R, *subgrad, *A, QPSolverTolRel, C=1.0, wdist=0.0;
201  floatmax_t rsum, sq_norm_W, sq_norm_Wdiff=0.0;
202  uint32_t *I;
203  uint8_t S=1;
204  CStructuredModel* model=machine->get_model();
205  uint32_t nDim=model->get_dim();
206  CSOSVMHelper* helper = NULL;
207 
208  CTime ttime;
209  float64_t tstart, tstop;
210 
211  bmrm_ll *CPList_head, *CPList_tail, *cp_ptr, *cp_ptr2, *cp_list=NULL;
212  float64_t *A_1=NULL, *A_2=NULL;
213  bool *map=NULL;
214 
215 
216  tstart=ttime.cur_time_diff(false);
217 
218  BufSize=_BufSize;
219  QPSolverTolRel=1e-9;
220 
221  uint32_t histSize = BufSize;
222  H=NULL;
223  b=NULL;
224  beta=NULL;
225  A=NULL;
226  subgrad=NULL;
227  diag_H=NULL;
228  I=NULL;
229  prevW=NULL;
230 
231 
233 
234  if (H==NULL)
235  {
236  bmrm.exitflag=-2;
237  goto cleanup;
238  }
239 
240  ASSERT(nDim > 0);
241  ASSERT(BufSize > 0);
242  REQUIRE(BufSize < (std::numeric_limits<size_t>::max() / nDim),
243  "overflow: %u * %u > %u -- biggest possible BufSize=%u or nDim=%u\n",
244  BufSize, nDim, std::numeric_limits<size_t>::max(),
245  (std::numeric_limits<size_t>::max() / nDim),
246  (std::numeric_limits<size_t>::max() / BufSize));
247 
248  A= (float64_t*) LIBBMRM_CALLOC(size_t(nDim)*size_t(BufSize), float64_t);
249 
250  if (A==NULL)
251  {
252  bmrm.exitflag=-2;
253  goto cleanup;
254  }
255 
256  b= (float64_t*) LIBBMRM_CALLOC(BufSize, float64_t);
257 
258  if (b==NULL)
259  {
260  bmrm.exitflag=-2;
261  goto cleanup;
262  }
263 
264  beta= (float64_t*) LIBBMRM_CALLOC(BufSize, float64_t);
265 
266  if (beta==NULL)
267  {
268  bmrm.exitflag=-2;
269  goto cleanup;
270  }
271 
272  subgrad= (float64_t*) LIBBMRM_CALLOC(nDim, float64_t);
273 
274  if (subgrad==NULL)
275  {
276  bmrm.exitflag=-2;
277  goto cleanup;
278  }
279 
280  diag_H= (float64_t*) LIBBMRM_CALLOC(BufSize, float64_t);
281 
282  if (diag_H==NULL)
283  {
284  bmrm.exitflag=-2;
285  goto cleanup;
286  }
287 
288  I= (uint32_t*) LIBBMRM_CALLOC(BufSize, uint32_t);
289 
290  if (I==NULL)
291  {
292  bmrm.exitflag=-2;
293  goto cleanup;
294  }
295 
296  ICP_stats icp_stats;
297  icp_stats.maxCPs = BufSize;
298 
299  icp_stats.ICPcounter= (uint32_t*) LIBBMRM_CALLOC(BufSize, uint32_t);
300  if (icp_stats.ICPcounter==NULL)
301  {
302  bmrm.exitflag=-2;
303  goto cleanup;
304  }
305 
306  icp_stats.ICPs= (float64_t**) LIBBMRM_CALLOC(BufSize, float64_t*);
307  if (icp_stats.ICPs==NULL)
308  {
309  bmrm.exitflag=-2;
310  goto cleanup;
311  }
312 
313  icp_stats.ACPs= (uint32_t*) LIBBMRM_CALLOC(BufSize, uint32_t);
314  if (icp_stats.ACPs==NULL)
315  {
316  bmrm.exitflag=-2;
317  goto cleanup;
318  }
319 
320  /* Temporary buffers for ICP removal */
321  icp_stats.H_buff= (float64_t*) LIBBMRM_CALLOC(BufSize*BufSize, float64_t);
322  if (icp_stats.H_buff==NULL)
323  {
324  bmrm.exitflag=-2;
325  goto cleanup;
326  }
327 
328  map= (bool*) LIBBMRM_CALLOC(BufSize, bool);
329 
330  if (map==NULL)
331  {
332  bmrm.exitflag=-2;
333  goto cleanup;
334  }
335 
336  memset( (bool*) map, true, BufSize);
337 
338  cp_list= (bmrm_ll*) LIBBMRM_CALLOC(1, bmrm_ll);
339 
340  if (cp_list==NULL)
341  {
342  bmrm.exitflag=-2;
343  goto cleanup;
344  }
345 
346  prevW= (float64_t*) LIBBMRM_CALLOC(nDim, float64_t);
347 
348  if (prevW==NULL)
349  {
350  bmrm.exitflag=-2;
351  goto cleanup;
352  }
353 
354  bmrm.hist_Fp = SGVector< float64_t >(histSize);
355  bmrm.hist_Fd = SGVector< float64_t >(histSize);
356  bmrm.hist_wdist = SGVector< float64_t >(histSize);
357 
358  /* Iinitial solution */
359  R=machine->risk(subgrad, W);
360 
361  bmrm.nCP=0;
362  bmrm.nIter=0;
363  bmrm.exitflag=0;
364 
365  b[0]=-R;
366 
367  /* Cutting plane auxiliary double linked list */
368 
369  LIBBMRM_MEMCPY(A, subgrad, nDim*sizeof(float64_t));
370  map[0]=false;
371  cp_list->address=&A[0];
372  cp_list->idx=0;
373  cp_list->prev=NULL;
374  cp_list->next=NULL;
375  CPList_head=cp_list;
376  CPList_tail=cp_list;
377 
378  /* Compute initial value of Fp, Fd, assuming that W is zero vector */
379 
380  sq_norm_W=0;
381  bmrm.Fp=R+0.5*_lambda*sq_norm_W;
382  bmrm.Fd=-LIBBMRM_PLUS_INF;
383 
384  tstop=ttime.cur_time_diff(false);
385 
386  /* Verbose output */
387  SG_SINFO("%4d: tim=%.3lf, Fp=%lf, Fd=%lf, R=%lf\n",
388  bmrm.nIter, tstop-tstart, bmrm.Fp, bmrm.Fd, R);
389 
390  /* store Fp, Fd and wdist history */
391  bmrm.hist_Fp[0]=bmrm.Fp;
392  bmrm.hist_Fd[0]=bmrm.Fd;
393  bmrm.hist_wdist[0]=0.0;
394 
395  if (store_train_info)
396  helper = machine->get_helper();
397 
398  /* main loop */
399  ASSERT(bmrm.nCP<BufSize);
400  while (bmrm.exitflag==0)
401  {
402  tstart=ttime.cur_time_diff(false);
403  bmrm.nIter++;
404 
405  /* Update H */
406 
407  if (bmrm.nCP>0)
408  {
409  A_2=get_cutting_plane(CPList_tail);
410  cp_ptr=CPList_head;
411 
412  for (uint32_t i=0; i<bmrm.nCP; ++i)
413  {
414  A_1=get_cutting_plane(cp_ptr);
415  cp_ptr=cp_ptr->next;
416  rsum= SGVector<float64_t>::dot(A_1, A_2, nDim);
417 
418  H[LIBBMRM_INDEX(bmrm.nCP, i, BufSize)]
419  = H[LIBBMRM_INDEX(i, bmrm.nCP, BufSize)]
420  = rsum/_lambda;
421  }
422  }
423 
424  A_2=get_cutting_plane(CPList_tail);
425  rsum = SGVector<float64_t>::dot(A_2, A_2, nDim);
426 
427  H[LIBBMRM_INDEX(bmrm.nCP, bmrm.nCP, BufSize)]=rsum/_lambda;
428 
429  diag_H[bmrm.nCP]=H[LIBBMRM_INDEX(bmrm.nCP, bmrm.nCP, BufSize)];
430  I[bmrm.nCP]=1;
431 
432  beta[bmrm.nCP]=0.0; // [beta; 0]
433  bmrm.nCP++;
434  ASSERT(bmrm.nCP<BufSize);
435 
436 #if 0
437  /* TODO: scaling...*/
438  float64_t scale = SGVector<float64_t>::max(diag_H, BufSize)/(1000.0*_lambda);
439  SGVector<float64_t> sb(bmrm.nCP);
440  sb.zero();
441  sb.vec1_plus_scalar_times_vec2(sb.vector, 1/scale, b, bmrm.nCP);
442 
443  SGVector<float64_t> sh(bmrm.nCP);
444  sh.zero();
445  sb.vec1_plus_scalar_times_vec2(sh.vector, 1/scale, diag_H, bmrm.nCP);
446 
447  qp_exitflag =
448  libqp_splx_solver(&get_col, sh.vector, sb.vector, &C, I, &S, beta,
449  bmrm.nCP, QPSolverMaxIter, 0.0, QPSolverTolRel, -LIBBMRM_PLUS_INF, 0);
450 #else
451  /* call QP solver */
452  qp_exitflag=libqp_splx_solver(&get_col, diag_H, b, &C, I, &S, beta,
453  bmrm.nCP, QPSolverMaxIter, 0.0, QPSolverTolRel, -LIBBMRM_PLUS_INF, 0);
454 #endif
455 
456  bmrm.qp_exitflag=qp_exitflag.exitflag;
457 
458  /* Update ICPcounter (add one to unused and reset used)
459  * + compute number of active CPs */
460  bmrm.nzA=0;
461 
462  for (uint32_t aaa=0; aaa<bmrm.nCP; ++aaa)
463  {
464  if (beta[aaa]>epsilon)
465  {
466  ++bmrm.nzA;
467  icp_stats.ICPcounter[aaa]=0;
468  }
469  else
470  {
471  icp_stats.ICPcounter[aaa]+=1;
472  }
473  }
474 
475  /* W update */
476  memset(W, 0, sizeof(float64_t)*nDim);
477  cp_ptr=CPList_head;
478  for (uint32_t j=0; j<bmrm.nCP; ++j)
479  {
480  A_1=get_cutting_plane(cp_ptr);
481  cp_ptr=cp_ptr->next;
482  SGVector<float64_t>::vec1_plus_scalar_times_vec2(W, -beta[j]/_lambda, A_1, nDim);
483  }
484 
485  /* risk and subgradient computation */
486  R = machine->risk(subgrad, W);
487  add_cutting_plane(&CPList_tail, map, A,
488  find_free_idx(map, BufSize), subgrad, nDim);
489 
490  sq_norm_W=SGVector<float64_t>::dot(W, W, nDim);
491  b[bmrm.nCP]=SGVector<float64_t>::dot(subgrad, W, nDim) - R;
492 
493  sq_norm_Wdiff=0.0;
494  for (uint32_t j=0; j<nDim; ++j)
495  {
496  sq_norm_Wdiff+=(W[j]-prevW[j])*(W[j]-prevW[j]);
497  }
498 
499  bmrm.Fp=R+0.5*_lambda*sq_norm_W;
500  bmrm.Fd=-qp_exitflag.QP;
501  wdist=CMath::sqrt(sq_norm_Wdiff);
502 
503  /* Stopping conditions */
504  if (bmrm.Fp - bmrm.Fd <= TolRel*LIBBMRM_ABS(bmrm.Fp))
505  bmrm.exitflag=1;
506 
507  if (bmrm.Fp - bmrm.Fd <= TolAbs)
508  bmrm.exitflag=2;
509 
510  tstop=ttime.cur_time_diff(false);
511 
512  /* Verbose output */
513  SG_SINFO("%4d: tim=%.3lf, Fp=%lf, Fd=%lf, (Fp-Fd)=%lf, (Fp-Fd)/Fp=%lf, R=%lf, nCP=%d, nzA=%d, QPexitflag=%d\n",
514  bmrm.nIter, tstop-tstart, bmrm.Fp, bmrm.Fd, bmrm.Fp-bmrm.Fd,
515  (bmrm.Fp-bmrm.Fd)/bmrm.Fp, R, bmrm.nCP, bmrm.nzA, qp_exitflag.exitflag);
516 
517  // iteration exceeds histSize
518  if (bmrm.nIter >= histSize)
519  {
520  histSize += BufSize;
521  bmrm.hist_Fp.resize_vector(histSize);
522  bmrm.hist_Fd.resize_vector(histSize);
523  bmrm.hist_wdist.resize_vector(histSize);
524  }
525 
526  /* Keep Fp, Fd and w_dist history */
527  ASSERT(bmrm.nIter < histSize);
528  bmrm.hist_Fp[bmrm.nIter]=bmrm.Fp;
529  bmrm.hist_Fd[bmrm.nIter]=bmrm.Fd;
530  bmrm.hist_wdist[bmrm.nIter]=wdist;
531 
532  /* keep W (for wdist history track) */
533  LIBBMRM_MEMCPY(prevW, W, nDim*sizeof(float64_t));
534 
535  /* Inactive Cutting Planes (ICP) removal */
536  if (cleanICP)
537  {
538  clean_icp(&icp_stats, bmrm, &CPList_head, &CPList_tail, H, diag_H, beta, map, cleanAfter, b, I);
539  ASSERT(bmrm.nCP<BufSize);
540  }
541 
542  // next CP would exceed BufSize
543  if (bmrm.nCP+1 >= BufSize)
544  bmrm.exitflag=-1;
545 
546  /* Debug: compute objective and training error */
547  if (store_train_info)
548  {
549  float64_t info_tstart=ttime.cur_time_diff(false);
550 
551  SGVector<float64_t> w_info(W, nDim, false);
552  float64_t primal = CSOSVMHelper::primal_objective(w_info, model, _lambda);
553  float64_t train_error = CSOSVMHelper::average_loss(w_info, model);
554  helper->add_debug_info(primal, bmrm.nIter, train_error);
555 
556  float64_t info_tstop=ttime.cur_time_diff(false);
557 
558  SG_SINFO("On iteration %4d, tim=%.3lf, primal=%.3lf, train_error=%lf\n", bmrm.nIter, info_tstop-info_tstart, primal, train_error);
559  }
560 
561  } /* end of main loop */
562 
563  if (store_train_info)
564  {
565  helper->terminate();
566  SG_UNREF(helper);
567  }
568 
569  ASSERT(bmrm.nIter+1 <= histSize);
570  bmrm.hist_Fp.resize_vector(bmrm.nIter+1);
571  bmrm.hist_Fd.resize_vector(bmrm.nIter+1);
572  bmrm.hist_wdist.resize_vector(bmrm.nIter+1);
573 
574  cp_ptr=CPList_head;
575 
576  while(cp_ptr!=NULL)
577  {
578  cp_ptr2=cp_ptr;
579  cp_ptr=cp_ptr->next;
580  LIBBMRM_FREE(cp_ptr2);
581  cp_ptr2=NULL;
582  }
583 
584  cp_list=NULL;
585 
586 cleanup:
587 
588  LIBBMRM_FREE(H);
589  LIBBMRM_FREE(b);
590  LIBBMRM_FREE(beta);
591  LIBBMRM_FREE(A);
592  LIBBMRM_FREE(subgrad);
593  LIBBMRM_FREE(diag_H);
594  LIBBMRM_FREE(I);
595  LIBBMRM_FREE(icp_stats.ICPcounter);
596  LIBBMRM_FREE(icp_stats.ICPs);
597  LIBBMRM_FREE(icp_stats.ACPs);
598  LIBBMRM_FREE(icp_stats.H_buff);
599  LIBBMRM_FREE(map);
600  LIBBMRM_FREE(prevW);
601 
602  if (cp_list)
603  LIBBMRM_FREE(cp_list);
604 
605  SG_UNREF(model);
606 
607  return(bmrm);
608 }
609 }

SHOGUN Machine Learning Toolbox - Documentation