SHOGUN  6.0.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules
SVMLight.cpp
Go to the documentation of this file.
1 /***********************************************************************/
2 /* */
3 /* SVMLight.cpp */
4 /* */
5 /* Author: Thorsten Joachims */
6 /* Date: 19.07.99 */
7 /* */
8 /* Copyright (c) 1999 Universitaet Dortmund - All rights reserved */
9 /* */
10 /* This software is available for non-commercial use only. It must */
11 /* not be modified and distributed without prior permission of the */
12 /* author. The author is not responsible for implications from the */
13 /* use of this software. */
14 /* */
15 /* THIS INCLUDES THE FOLLOWING ADDITIONS */
16 /* Generic Kernel Interfacing: Soeren Sonnenburg */
17 /* Parallizations: Soeren Sonnenburg */
18 /* Multiple Kernel Learning: Gunnar Raetsch, Soeren Sonnenburg, */
19 /* Alexander Zien, Marius Kloft, Chen Guohua */
20 /* Linadd Speedup: Gunnar Raetsch, Soeren Sonnenburg */
21 /* */
22 /***********************************************************************/
23 #include <shogun/lib/config.h>
24 
25 #ifdef USE_SVMLIGHT
26 
27 #include <shogun/io/SGIO.h>
28 #include <shogun/lib/Signal.h>
30 #include <shogun/lib/Time.h>
32 
34 #include <shogun/lib/external/pr_loqo.h>
35 
36 #include <shogun/kernel/Kernel.h>
39 
40 #ifndef _WIN32
41 #include <unistd.h>
42 #endif
43 
44 #include <shogun/base/Parallel.h>
46 
47 #include <stdio.h>
48 #include <ctype.h>
49 #include <string.h>
50 #include <stdlib.h>
51 #include <time.h>
52 
53 #ifdef HAVE_PTHREAD
54 #include <pthread.h>
55 #endif
56 
57 using namespace shogun;
58 
59 #ifndef DOXYGEN_SHOULD_SKIP_THIS
60 struct S_THREAD_PARAM_REACTIVATE_LINADD
61 {
62  CKernel* kernel;
63  float64_t* lin;
64  float64_t* last_lin;
65  int32_t* active;
66  int32_t* docs;
67  int32_t start;
68  int32_t end;
69 };
70 
71 struct S_THREAD_PARAM_SVMLIGHT
72 {
73  float64_t * lin ;
74  float64_t* W;
75  int32_t start, end;
76  int32_t * active2dnum ;
77  int32_t * docs ;
78  CKernel* kernel ;
79 };
80 
81 struct S_THREAD_PARAM_REACTIVATE_VANILLA
82 {
83  CKernel* kernel;
84  float64_t* lin;
85  float64_t* aicache;
86  float64_t* a;
87  float64_t* a_old;
88  int32_t* changed2dnum;
89  int32_t* inactive2dnum;
90  int32_t* label;
91  int32_t start;
92  int32_t end;
93 };
94 
95 struct S_THREAD_PARAM_KERNEL
96 {
97  float64_t *Kval ;
98  int32_t *KI, *KJ ;
99  int32_t start, end;
100  CSVMLight* svmlight;
101 };
102 
103 #endif // DOXYGEN_SHOULD_SKIP_THIS
104 
106 {
107  S_THREAD_PARAM_SVMLIGHT* params = (S_THREAD_PARAM_SVMLIGHT*) p;
108 
109  int32_t jj=0, j=0 ;
110 
111  for (jj=params->start;(jj<params->end) && (j=params->active2dnum[jj])>=0;jj++)
112  params->lin[j]+=params->kernel->compute_optimized(params->docs[j]);
113 
114  return NULL ;
115 }
116 
118 {
119  S_THREAD_PARAM_KERNEL* params = (S_THREAD_PARAM_KERNEL*) p;
120 
121  int32_t jj=0 ;
122  for (jj=params->start;jj<params->end;jj++)
123  params->Kval[jj]=params->svmlight->compute_kernel(params->KI[jj], params->KJ[jj]) ;
124 
125  return NULL ;
126 }
127 
129 : CSVM()
130 {
131  init();
132  set_kernel(NULL);
133 }
134 
136 : CSVM(C, k, lab)
137 {
138  init();
139 }
140 
142 {
143  //optimizer settings
144  primal=NULL;
145  dual=NULL;
146  init_margin=0.15;
147  init_iter=500;
149  model_b=0;
150  verbosity=1;
152 
153  // svm variables
154  W=NULL;
155  model=SG_MALLOC(MODEL, 1);
156  learn_parm=SG_MALLOC(LEARN_PARM, 1);
157  model->supvec=NULL;
158  model->alpha=NULL;
159  model->index=NULL;
160 
161  // MKL stuff
162  mymaxdiff=1 ;
163  mkl_converged=false;
164 }
165 
167 {
168 
169  SG_FREE(model->supvec);
170  SG_FREE(model->alpha);
171  SG_FREE(model->index);
172  SG_FREE(model);
173  SG_FREE(learn_parm);
174 
175  // MKL stuff
176  SG_FREE(W);
177 
178  // optimizer variables
179  SG_FREE(dual);
180  SG_FREE(primal);
181 }
182 
184 {
185  //certain setup params
186  mkl_converged=false;
187  verbosity=1 ;
188  init_margin=0.15;
189  init_iter=500;
192 
193  strcpy (learn_parm->predfile, "");
194  learn_parm->biased_hyperplane= get_bias_enabled() ? 1 : 0;
195  learn_parm->sharedslack=0;
196  learn_parm->remove_inconsistent=0;
197  learn_parm->skip_final_opt_check=0;
198  learn_parm->svm_maxqpsize=get_qpsize();
199  learn_parm->svm_newvarsinqp=learn_parm->svm_maxqpsize-1;
200  learn_parm->maxiter=100000;
201  learn_parm->svm_iter_to_shrink=100;
202  learn_parm->svm_c=C1;
203  learn_parm->transduction_posratio=0.33;
204  learn_parm->svm_costratio=C2/C1;
205  learn_parm->svm_costratio_unlab=1.0;
206  learn_parm->svm_unlabbound=1E-5;
207  learn_parm->epsilon_crit=epsilon; // GU: better decrease it ... ??
208  learn_parm->epsilon_a=1E-15;
209  learn_parm->compute_loo=0;
210  learn_parm->rho=1.0;
211  learn_parm->xa_depth=0;
212 
213  if (!kernel)
214  SG_ERROR("SVM_light can not proceed without kernel!\n")
215 
216  if (data)
217  {
218  if (m_labels->get_num_labels() != data->get_num_vectors())
219  {
220  SG_ERROR("%s::train_machine(): Number of training vectors (%d) does"
221  " not match number of labels (%d)\n", get_name(),
223  }
224  kernel->init(data, data);
225  }
226 
227  if (!kernel->has_features())
228  SG_ERROR("SVM_light can not proceed without initialized kernel!\n")
229 
233 
234  // in case of LINADD enabled kernels cleanup!
236  kernel->clear_normal() ;
237 
238  // output some info
239  SG_DEBUG("threads = %i\n", parallel->get_num_threads())
240  SG_DEBUG("qpsize = %i\n", learn_parm->svm_maxqpsize)
241  SG_DEBUG("epsilon = %1.1e\n", learn_parm->epsilon_crit)
242  SG_DEBUG("kernel->has_property(KP_LINADD) = %i\n", kernel->has_property(KP_LINADD))
243  SG_DEBUG("kernel->has_property(KP_KERNCOMBINATION) = %i\n", kernel->has_property(KP_KERNCOMBINATION))
244  SG_DEBUG("kernel->has_property(KP_BATCHEVALUATION) = %i\n", kernel->has_property(KP_BATCHEVALUATION))
245  SG_DEBUG("kernel->get_optimization_type() = %s\n", kernel->get_optimization_type()==FASTBUTMEMHUNGRY ? "FASTBUTMEMHUNGRY" : "SLOWBUTMEMEFFICIENT" )
246  SG_DEBUG("get_solver_type() = %i\n", get_solver_type())
247  SG_DEBUG("get_linadd_enabled() = %i\n", get_linadd_enabled())
248  SG_DEBUG("get_batch_computation_enabled() = %i\n", get_batch_computation_enabled())
249  SG_DEBUG("kernel->get_num_subkernels() = %i\n", kernel->get_num_subkernels())
250 
253 
254  SG_DEBUG("use_kernel_cache = %i\n", use_kernel_cache)
255 
257  {
258 
259  for (index_t k_idx=0; k_idx<((CCombinedKernel*) kernel)->get_num_kernels(); k_idx++)
260  {
261  CKernel* kn = ((CCombinedKernel*) kernel)->get_kernel(k_idx);
262  // allocate kernel cache but clean up beforehand
264  SG_UNREF(kn);
265  }
266  }
267 
269 
270  // train the svm
271  svm_learn();
272 
273  // brain damaged svm light work around
274  create_new_model(model->sv_num-1);
275  set_bias(-model->b);
276  for (int32_t i=0; i<model->sv_num-1; i++)
277  {
278  set_alpha(i, model->alpha[i+1]);
279  set_support_vector(i, model->supvec[i+1]);
280  }
281 
282  // in case of LINADD enabled kernels cleanup!
284  {
285  kernel->clear_normal() ;
287  }
288 
289  if (use_kernel_cache)
291 
292  return true ;
293 }
294 
296 {
297  clock_t start;
298  start = clock();
299  return((int32_t)((float64_t)start*100.0/(float64_t)CLOCKS_PER_SEC));
300 }
301 
302 
304 {
305  int32_t *inconsistent, i;
306  int32_t misclassified,upsupvecnum;
307  float64_t maxdiff, *lin, *c, *a;
308  int32_t iterations;
309  int32_t trainpos=0, trainneg=0 ;
311  SGVector<int32_t> lab=((CBinaryLabels*) m_labels)->get_int_labels();
312  int32_t totdoc=lab.vlen;
313  ASSERT(lab.vector && lab.vlen)
314  int32_t* label=SGVector<int32_t>::clone_vector(lab.vector, lab.vlen);
315 
316  int32_t* docs=SG_MALLOC(int32_t, totdoc);
317  SG_FREE(W);
318  W=NULL;
319  count = 0 ;
320 
322  {
323  W = SG_MALLOC(float64_t, totdoc*kernel->get_num_subkernels());
324  for (i=0; i<totdoc*kernel->get_num_subkernels(); i++)
325  W[i]=0;
326  }
327 
328  for (i=0; i<totdoc; i++)
329  docs[i]=i;
330 
331  float64_t *xi_fullset; /* buffer for storing xi on full sample in loo */
332  float64_t *a_fullset; /* buffer for storing alpha on full sample in loo */
333  TIMING timing_profile;
334  SHRINK_STATE shrink_state;
335 
336  timing_profile.time_kernel=0;
337  timing_profile.time_opti=0;
338  timing_profile.time_shrink=0;
339  timing_profile.time_update=0;
340  timing_profile.time_model=0;
341  timing_profile.time_check=0;
342  timing_profile.time_select=0;
343 
344  /* make sure -n value is reasonable */
345  if((learn_parm->svm_newvarsinqp < 2)
346  || (learn_parm->svm_newvarsinqp > learn_parm->svm_maxqpsize)) {
347  learn_parm->svm_newvarsinqp=learn_parm->svm_maxqpsize;
348  }
349 
350  init_shrink_state(&shrink_state,totdoc,(int32_t)MAXSHRINK);
351 
352  inconsistent = SG_MALLOC(int32_t, totdoc);
353  c = SG_MALLOC(float64_t, totdoc);
354  a = SG_MALLOC(float64_t, totdoc);
355  a_fullset = SG_MALLOC(float64_t, totdoc);
356  xi_fullset = SG_MALLOC(float64_t, totdoc);
357  lin = SG_MALLOC(float64_t, totdoc);
358  if (m_linear_term.vlen>0)
360  else
361  {
362  learn_parm->eps=SG_MALLOC(float64_t, totdoc); /* equivalent regression epsilon for classification */
363  SGVector<float64_t>::fill_vector(learn_parm->eps, totdoc, -1.0);
364  }
365 
366  learn_parm->svm_cost = SG_MALLOC(float64_t, totdoc);
367 
368  SG_FREE(model->supvec);
369  SG_FREE(model->alpha);
370  SG_FREE(model->index);
371  model->supvec = SG_MALLOC(int32_t, totdoc+2);
372  model->alpha = SG_MALLOC(float64_t, totdoc+2);
373  model->index = SG_MALLOC(int32_t, totdoc+2);
374 
375  model->at_upper_bound=0;
376  model->b=0;
377  model->supvec[0]=0; /* element 0 reserved and empty for now */
378  model->alpha[0]=0;
379  model->totdoc=totdoc;
380 
381  model->kernel=kernel;
382 
383  model->sv_num=1;
384  model->loo_error=-1;
385  model->loo_recall=-1;
386  model->loo_precision=-1;
387  model->xa_error=-1;
388  model->xa_recall=-1;
389  model->xa_precision=-1;
390 
391  for (i=0;i<totdoc;i++) { /* various inits */
392  inconsistent[i]=0;
393  c[i]=0;
394  a[i]=0;
395  lin[i]=0;
396 
397  if(label[i] > 0) {
398  learn_parm->svm_cost[i]=learn_parm->svm_c*learn_parm->svm_costratio*
399  fabs((float64_t)label[i]);
400  label[i]=1;
401  trainpos++;
402  }
403  else if(label[i] < 0) {
404  learn_parm->svm_cost[i]=learn_parm->svm_c*fabs((float64_t)label[i]);
405  label[i]=-1;
406  trainneg++;
407  }
408  else {
409  learn_parm->svm_cost[i]=0;
410  }
411  }
412 
413  /* compute starting state for initial alpha values */
414  SG_DEBUG("alpha:%d num_sv:%d\n", m_alpha.vector, get_num_support_vectors())
415 
417  if(verbosity>=1) {
418  SG_INFO("Computing starting state...")
419  }
420 
421  float64_t* alpha = SG_MALLOC(float64_t, totdoc);
422 
423  for (i=0; i<totdoc; i++)
424  alpha[i]=0;
425 
426  for (i=0; i<get_num_support_vectors(); i++)
427  alpha[get_support_vector(i)]=get_alpha(i);
428 
429  int32_t* index = SG_MALLOC(int32_t, totdoc);
430  int32_t* index2dnum = SG_MALLOC(int32_t, totdoc+11);
431  float64_t* aicache = SG_MALLOC(float64_t, totdoc);
432  for (i=0;i<totdoc;i++) { /* create full index and clip alphas */
433  index[i]=1;
434  alpha[i]=fabs(alpha[i]);
435  if(alpha[i]<0) alpha[i]=0;
436  if(alpha[i]>learn_parm->svm_cost[i]) alpha[i]=learn_parm->svm_cost[i];
437  }
438 
439  if (use_kernel_cache)
440  {
441  if (callback &&
442  (!((CCombinedKernel*) kernel)->get_append_subkernel_weights())
443  )
444  {
446  for (index_t k_idx=0; k_idx<k->get_num_kernels(); k_idx++)
447  {
448  CKernel* kn = k->get_kernel(k_idx);
449  for (i=0;i<totdoc;i++) // fill kernel cache with unbounded SV
450  if((alpha[i]>0) && (alpha[i]<learn_parm->svm_cost[i])
451  && (kn->kernel_cache_space_available()))
452  kn->cache_kernel_row(i);
453 
454  for (i=0;i<totdoc;i++) // fill rest of kernel cache with bounded SV
455  if((alpha[i]==learn_parm->svm_cost[i])
456  && (kn->kernel_cache_space_available()))
457  kn->cache_kernel_row(i);
458 
459  SG_UNREF(kn);
460  }
461  }
462  else
463  {
464  for (i=0;i<totdoc;i++) /* fill kernel cache with unbounded SV */
465  if((alpha[i]>0) && (alpha[i]<learn_parm->svm_cost[i])
468 
469  for (i=0;i<totdoc;i++) /* fill rest of kernel cache with bounded SV */
470  if((alpha[i]==learn_parm->svm_cost[i])
473  }
474  }
475  compute_index(index,totdoc,index2dnum);
476  update_linear_component(docs,label,index2dnum,alpha,a,index2dnum,totdoc,
477  lin,aicache,NULL);
478  calculate_svm_model(docs,label,lin,alpha,a,c,
479  index2dnum,index2dnum);
480  for (i=0;i<totdoc;i++) { /* copy initial alphas */
481  a[i]=alpha[i];
482  }
483 
484  SG_FREE(index);
485  SG_FREE(index2dnum);
486  SG_FREE(aicache);
487  SG_FREE(alpha);
488 
489  if(verbosity>=1)
490  SG_DONE()
491  }
492  SG_DEBUG("%d totdoc %d pos %d neg\n", totdoc, trainpos, trainneg)
493  SG_DEBUG("Optimizing...\n")
494 
495  /* train the svm */
496  iterations=optimize_to_convergence(docs,label,totdoc,
497  &shrink_state,inconsistent,a,lin,
498  c,&timing_profile,
499  &maxdiff,(int32_t)-1,
500  (int32_t)1);
501 
502 
503  if(verbosity>=1) {
504  if(verbosity==1)
505  {
506  SG_DONE()
507  SG_DEBUG("(%ld iterations)", iterations)
508  }
509 
510  misclassified=0;
511  for (i=0;(i<totdoc);i++) { /* get final statistic */
512  if((lin[i]-model->b)*(float64_t)label[i] <= 0.0)
513  misclassified++;
514  }
515 
516  SG_INFO("Optimization finished (%ld misclassified, maxdiff=%.8f).\n",
517  misclassified,maxdiff);
518 
519  SG_INFO("obj = %.16f, rho = %.16f\n",get_objective(),model->b)
520  if (maxdiff>epsilon)
521  SG_WARNING("maximum violation (%f) exceeds svm_epsilon (%f) due to numerical difficulties\n", maxdiff, epsilon)
522 
523  upsupvecnum=0;
524  for (i=1;i<model->sv_num;i++)
525  {
526  if(fabs(model->alpha[i]) >=
527  (learn_parm->svm_cost[model->supvec[i]]-
528  learn_parm->epsilon_a))
529  upsupvecnum++;
530  }
531  SG_INFO("Number of SV: %d (including %d at upper bound)\n",
532  model->sv_num-1,upsupvecnum);
533  }
534 
535  shrink_state_cleanup(&shrink_state);
536  SG_FREE(label);
537  SG_FREE(inconsistent);
538  SG_FREE(c);
539  SG_FREE(a);
540  SG_FREE(a_fullset);
541  SG_FREE(xi_fullset);
542  SG_FREE(lin);
543  SG_FREE(learn_parm->eps);
544  SG_FREE(learn_parm->svm_cost);
545  SG_FREE(docs);
546 }
547 
548 int32_t CSVMLight::optimize_to_convergence(int32_t* docs, int32_t* label, int32_t totdoc,
549  SHRINK_STATE *shrink_state,
550  int32_t *inconsistent,
551  float64_t *a, float64_t *lin, float64_t *c,
552  TIMING *timing_profile, float64_t *maxdiff,
553  int32_t heldout, int32_t retrain)
554  /* docs: Training vectors (x-part) */
555  /* label: Training labels/value (y-part, zero if test example for
556  transduction) */
557  /* totdoc: Number of examples in docs/label */
558  /* laern_parm: Learning paramenters */
559  /* kernel_parm: Kernel paramenters */
560  /* kernel_cache: Initialized/partly filled Cache, if using a kernel.
561  NULL if linear. */
562  /* shrink_state: State of active variables */
563  /* inconsistent: examples thrown out as inconstistent */
564  /* a: alphas */
565  /* lin: linear component of gradient */
566  /* c: right hand side of inequalities (margin) */
567  /* maxdiff: returns maximum violation of KT-conditions */
568  /* heldout: marks held-out example for leave-one-out (or -1) */
569  /* retrain: selects training mode (1=regular / 2=holdout) */
570 {
571 
572  int32_t *chosen,*key,i,j,jj,*last_suboptimal_at,noshrink;
573  int32_t inconsistentnum,choosenum,already_chosen=0,iteration;
574  int32_t misclassified,supvecnum=0,*active2dnum,inactivenum;
575  int32_t *working2dnum,*selexam;
576  int32_t activenum;
577  float64_t criterion, eq;
578  float64_t *a_old;
579  int32_t t0=0,t1=0,t2=0,t3=0,t4=0,t5=0,t6=0; /* timing */
580  float64_t epsilon_crit_org;
581  float64_t bestmaxdiff;
582  float64_t worstmaxdiff;
583  int32_t bestmaxdiffiter,terminate;
584  bool reactivated=false;
585 
586  float64_t *selcrit; /* buffer for sorting */
587  float64_t *aicache; /* buffer to keep one row of hessian */
588  QP qp; /* buffer for one quadratic program */
589 
590  epsilon_crit_org=learn_parm->epsilon_crit; /* save org */
592  learn_parm->epsilon_crit=2.0;
593  /* caching makes no sense for linear kernel */
594  }
595  learn_parm->epsilon_shrink=2;
596  (*maxdiff)=1;
597 
598  SG_DEBUG("totdoc:%d\n",totdoc)
599  chosen = SG_MALLOC(int32_t, totdoc);
600  last_suboptimal_at =SG_MALLOC(int32_t, totdoc);
601  key =SG_MALLOC(int32_t, totdoc+11);
602  selcrit =SG_MALLOC(float64_t, totdoc);
603  selexam =SG_MALLOC(int32_t, totdoc);
604  a_old =SG_MALLOC(float64_t, totdoc);
605  aicache =SG_MALLOC(float64_t, totdoc);
606  working2dnum =SG_MALLOC(int32_t, totdoc+11);
607  active2dnum =SG_MALLOC(int32_t, totdoc+11);
608  qp.opt_ce =SG_MALLOC(float64_t, learn_parm->svm_maxqpsize);
609  qp.opt_ce0 =SG_MALLOC(float64_t, 1);
610  qp.opt_g =SG_MALLOC(float64_t, learn_parm->svm_maxqpsize*learn_parm->svm_maxqpsize);
611  qp.opt_g0 =SG_MALLOC(float64_t, learn_parm->svm_maxqpsize);
612  qp.opt_xinit =SG_MALLOC(float64_t, learn_parm->svm_maxqpsize);
613  qp.opt_low=SG_MALLOC(float64_t, learn_parm->svm_maxqpsize);
614  qp.opt_up=SG_MALLOC(float64_t, learn_parm->svm_maxqpsize);
615 
616  choosenum=0;
617  inconsistentnum=0;
618  if(!retrain) retrain=1;
619  iteration=1;
620  bestmaxdiffiter=1;
621  bestmaxdiff=999999999;
622  worstmaxdiff=1e-10;
623  terminate=0;
624 
625 
626  kernel->set_time(iteration); /* for lru cache */
627 
628  for (i=0;i<totdoc;i++) { /* various inits */
629  chosen[i]=0;
630  a_old[i]=a[i];
631  last_suboptimal_at[i]=1;
632  if(inconsistent[i])
633  inconsistentnum++;
634  }
635  activenum=compute_index(shrink_state->active,totdoc,active2dnum);
636  inactivenum=totdoc-activenum;
637  clear_index(working2dnum);
638 
639  /* repeat this loop until we have convergence */
640  CTime start_time;
641  mkl_converged=false;
642 
643 
644 #ifdef CYGWIN
645  for (;((iteration<100 || (!mkl_converged && callback) ) || (retrain && (!terminate))); iteration++){
646 #else
648  for (;((!CSignal::cancel_computations()) && ((iteration<3 || (!mkl_converged && callback) ) || (retrain && (!terminate)))); iteration++){
649 #endif
650 
651  if(use_kernel_cache)
652  kernel->set_time(iteration); /* for lru cache */
653 
654  if(verbosity>=2) t0=get_runtime();
655  if(verbosity>=3) {
656  SG_DEBUG("\nSelecting working set... ")
657  }
658 
659  if(learn_parm->svm_newvarsinqp>learn_parm->svm_maxqpsize)
660  learn_parm->svm_newvarsinqp=learn_parm->svm_maxqpsize;
661 
662  i=0;
663  for (jj=0;(j=working2dnum[jj])>=0;jj++) { /* clear working set */
664  if((chosen[j]>=(learn_parm->svm_maxqpsize/
665  CMath::min(learn_parm->svm_maxqpsize,
666  learn_parm->svm_newvarsinqp)))
667  || (inconsistent[j])
668  || (j == heldout)) {
669  chosen[j]=0;
670  choosenum--;
671  }
672  else {
673  chosen[j]++;
674  working2dnum[i++]=j;
675  }
676  }
677  working2dnum[i]=-1;
678 
679  if(retrain == 2) {
680  choosenum=0;
681  for (jj=0;(j=working2dnum[jj])>=0;jj++) { /* fully clear working set */
682  chosen[j]=0;
683  }
684  clear_index(working2dnum);
685  for (i=0;i<totdoc;i++) { /* set inconsistent examples to zero (-i 1) */
686  if((inconsistent[i] || (heldout==i)) && (a[i] != 0.0)) {
687  chosen[i]=99999;
688  choosenum++;
689  a[i]=0;
690  }
691  }
692  if(learn_parm->biased_hyperplane) {
693  eq=0;
694  for (i=0;i<totdoc;i++) { /* make sure we fulfill equality constraint */
695  eq+=a[i]*label[i];
696  }
697  for (i=0;(i<totdoc) && (fabs(eq) > learn_parm->epsilon_a);i++) {
698  if((eq*label[i] > 0) && (a[i] > 0)) {
699  chosen[i]=88888;
700  choosenum++;
701  if((eq*label[i]) > a[i]) {
702  eq-=(a[i]*label[i]);
703  a[i]=0;
704  }
705  else {
706  a[i]-=(eq*label[i]);
707  eq=0;
708  }
709  }
710  }
711  }
712  compute_index(chosen,totdoc,working2dnum);
713  }
714  else
715  { /* select working set according to steepest gradient */
716  if(iteration % 101)
717  {
718  already_chosen=0;
719  if(CMath::min(learn_parm->svm_newvarsinqp, learn_parm->svm_maxqpsize-choosenum)>=4 &&
721  {
722  /* select part of the working set from cache */
723  already_chosen=select_next_qp_subproblem_grad(
724  label,a,lin,c,totdoc,
725  (int32_t)(CMath::min(learn_parm->svm_maxqpsize-choosenum,
726  learn_parm->svm_newvarsinqp)/2),
727  inconsistent,active2dnum,
728  working2dnum,selcrit,selexam,1,
729  key,chosen);
730  choosenum+=already_chosen;
731  }
733  label,a,lin,c,totdoc,
734  CMath::min(learn_parm->svm_maxqpsize-choosenum,
735  learn_parm->svm_newvarsinqp-already_chosen),
736  inconsistent,active2dnum,
737  working2dnum,selcrit,selexam,0,key,
738  chosen);
739  }
740  else { /* once in a while, select a somewhat random working set
741  to get unlocked of infinite loops due to numerical
742  inaccuracies in the core qp-solver */
744  label,a,lin,c,totdoc,
745  CMath::min(learn_parm->svm_maxqpsize-choosenum,
746  learn_parm->svm_newvarsinqp),
747  inconsistent,active2dnum,
748  working2dnum,selcrit,selexam,key,
749  chosen,iteration);
750  }
751  }
752 
753  if(verbosity>=2) {
754  SG_INFO(" %ld vectors chosen\n",choosenum)
755  }
756 
757  if(verbosity>=2) t1=get_runtime();
758 
759  if (use_kernel_cache)
760  {
761  // in case of MKL w/o linadd cache each kernel independently
762  // else if linadd is disabled cache single kernel
763  if ( callback &&
764  (!((CCombinedKernel*) kernel)->get_append_subkernel_weights())
765  )
766  {
768  for (index_t k_idx=0; k_idx<k->get_num_kernels(); k_idx++)
769  {
770  CKernel* kn = k->get_kernel(k_idx);
771  kn->cache_multiple_kernel_rows(working2dnum, choosenum);
772  SG_UNREF(kn);
773  }
774  }
775  else
776  kernel->cache_multiple_kernel_rows(working2dnum, choosenum);
777  }
778 
779  if(verbosity>=2) t2=get_runtime();
780 
781  if(retrain != 2) {
782  optimize_svm(docs,label,inconsistent,0.0,chosen,active2dnum,
783  totdoc,working2dnum,choosenum,a,lin,c,
784  aicache,&qp,&epsilon_crit_org);
785  }
786 
787  if(verbosity>=2) t3=get_runtime();
788  update_linear_component(docs,label,active2dnum,a,a_old,working2dnum,totdoc,
789  lin,aicache,c);
790 
791  if(verbosity>=2) t4=get_runtime();
792  supvecnum=calculate_svm_model(docs,label,lin,a,a_old,c,working2dnum,active2dnum);
793 
794  if(verbosity>=2) t5=get_runtime();
795 
796  for (jj=0;(i=working2dnum[jj])>=0;jj++) {
797  a_old[i]=a[i];
798  }
799 
800  retrain=check_optimality(label,a,lin,c,totdoc,
801  maxdiff,epsilon_crit_org,&misclassified,
802  inconsistent,active2dnum,last_suboptimal_at,
803  iteration);
804 
805  if(verbosity>=2) {
806  t6=get_runtime();
807  timing_profile->time_select+=t1-t0;
808  timing_profile->time_kernel+=t2-t1;
809  timing_profile->time_opti+=t3-t2;
810  timing_profile->time_update+=t4-t3;
811  timing_profile->time_model+=t5-t4;
812  timing_profile->time_check+=t6-t5;
813  }
814 
815  /* checking whether optimizer got stuck */
816  if((*maxdiff) < bestmaxdiff) {
817  bestmaxdiff=(*maxdiff);
818  bestmaxdiffiter=iteration;
819  }
820  if(iteration > (bestmaxdiffiter+learn_parm->maxiter)) {
821  /* int32_t time no progress? */
822  terminate=1;
823  retrain=0;
824  SG_WARNING("Relaxing KT-Conditions due to slow progress! Terminating!\n")
825  SG_DEBUG("(iteration :%d, bestmaxdiffiter: %d, lean_param->maxiter: %d)\n", iteration, bestmaxdiffiter, learn_parm->maxiter )
826  }
827 
828  noshrink= (get_shrinking_enabled()) ? 0 : 1;
829 
830  if ((!callback) && (!retrain) && (inactivenum>0) &&
831  ((!learn_parm->skip_final_opt_check) || (kernel->has_property(KP_LINADD) && get_linadd_enabled())))
832  {
833  t1=get_runtime();
834  SG_DEBUG("reactivating inactive examples\n")
835 
836  reactivate_inactive_examples(label,a,shrink_state,lin,c,totdoc,
837  iteration,inconsistent,
838  docs,aicache,
839  maxdiff);
840  reactivated=true;
841  SG_DEBUG("done reactivating inactive examples (maxdiff:%8f eps_crit:%8f orig_eps:%8f)\n", *maxdiff, learn_parm->epsilon_crit, epsilon_crit_org)
842  /* Update to new active variables. */
843  activenum=compute_index(shrink_state->active,totdoc,active2dnum);
844  inactivenum=totdoc-activenum;
845  /* reset watchdog */
846  bestmaxdiff=(*maxdiff);
847  bestmaxdiffiter=iteration;
848 
849  /* termination criterion */
850  noshrink=1;
851  retrain=0;
852  if((*maxdiff) > learn_parm->epsilon_crit)
853  {
854  SG_INFO("restarting optimization as we are - due to shrinking - deviating too much (maxdiff(%f) > eps(%f))\n", *maxdiff, learn_parm->epsilon_crit)
855  retrain=1;
856  }
857  timing_profile->time_shrink+=get_runtime()-t1;
858  if (((verbosity>=1) && (!(kernel->has_property(KP_LINADD) && get_linadd_enabled())))
859  || (verbosity>=2)) {
860  SG_DONE()
861  SG_DEBUG("Number of inactive variables = %ld\n", inactivenum)
862  }
863  }
864 
865  if((!retrain) && (learn_parm->epsilon_crit>(*maxdiff)))
866  learn_parm->epsilon_crit=(*maxdiff);
867  if((!retrain) && (learn_parm->epsilon_crit>epsilon_crit_org)) {
868  learn_parm->epsilon_crit/=4.0;
869  retrain=1;
870  noshrink=1;
871  }
872  if(learn_parm->epsilon_crit<epsilon_crit_org)
873  learn_parm->epsilon_crit=epsilon_crit_org;
874 
875  if(verbosity>=2) {
876  SG_INFO(" => (%ld SV (incl. %ld SV at u-bound), max violation=%.5f)\n",
877  supvecnum,model->at_upper_bound,(*maxdiff));
878 
879  }
880  mymaxdiff=*maxdiff ;
881 
882  //don't shrink w/ mkl
883  if (((iteration % 10) == 0) && (!noshrink) && !callback)
884  {
885  activenum=shrink_problem(shrink_state,active2dnum,last_suboptimal_at,iteration,totdoc,
886  CMath::max((int32_t)(activenum/10),
887  CMath::max((int32_t)(totdoc/500),(int32_t) 100)),
888  a,inconsistent, c, lin, label);
889 
890  inactivenum=totdoc-activenum;
891 
892  if (use_kernel_cache && (supvecnum>kernel->get_max_elems_cache())
893  && ((kernel->get_activenum_cache()-activenum)>CMath::max((int32_t)(activenum/10),(int32_t) 500))) {
894 
895  kernel->kernel_cache_shrink(totdoc, CMath::min((int32_t) (kernel->get_activenum_cache()-activenum),
896  (int32_t) (kernel->get_activenum_cache()-supvecnum)),
897  shrink_state->active);
898  }
899  }
900 
901  if (bestmaxdiff>worstmaxdiff)
902  worstmaxdiff=bestmaxdiff;
903 
904  SG_ABS_PROGRESS(bestmaxdiff, -CMath::log10(bestmaxdiff), -CMath::log10(worstmaxdiff), -CMath::log10(epsilon), 6)
905 
906  /* Terminate loop */
907  if (m_max_train_time > 0 && start_time.cur_time_diff() > m_max_train_time) {
908  terminate = 1;
909  retrain = 0;
910  }
911 
912  } /* end of loop */
913 
914  SG_DEBUG("inactive:%d\n", inactivenum)
915 
916  if (inactivenum && !reactivated && !callback)
917  {
918  SG_DEBUG("reactivating inactive examples\n")
919  reactivate_inactive_examples(label,a,shrink_state,lin,c,totdoc,
920  iteration,inconsistent,
921  docs,aicache,
922  maxdiff);
923  SG_DEBUG("done reactivating inactive examples\n")
924  /* Update to new active variables. */
925  activenum=compute_index(shrink_state->active,totdoc,active2dnum);
926  inactivenum=totdoc-activenum;
927  /* reset watchdog */
928  bestmaxdiff=(*maxdiff);
929  bestmaxdiffiter=iteration;
930  }
931 
932  //use this for our purposes!
933  criterion=compute_objective_function(a,lin,c,learn_parm->eps,label,totdoc);
934  CSVM::set_objective(criterion);
935 
936  SG_FREE(chosen);
937  SG_FREE(last_suboptimal_at);
938  SG_FREE(key);
939  SG_FREE(selcrit);
940  SG_FREE(selexam);
941  SG_FREE(a_old);
942  SG_FREE(aicache);
943  SG_FREE(working2dnum);
944  SG_FREE(active2dnum);
945  SG_FREE(qp.opt_ce);
946  SG_FREE(qp.opt_ce0);
947  SG_FREE(qp.opt_g);
948  SG_FREE(qp.opt_g0);
949  SG_FREE(qp.opt_xinit);
950  SG_FREE(qp.opt_low);
951  SG_FREE(qp.opt_up);
952 
953  learn_parm->epsilon_crit=epsilon_crit_org; /* restore org */
954 
955  return(iteration);
956 }
957 
959  float64_t *a, float64_t *lin, float64_t *c, float64_t* eps, int32_t *label,
960  int32_t totdoc)
961  /* Return value of objective function. */
962  /* Works only relative to the active variables! */
963 {
964  /* calculate value of objective function */
965  float64_t criterion=0;
966 
967  for (int32_t i=0;i<totdoc;i++)
968  criterion=criterion+(eps[i]-(float64_t)label[i]*c[i])*a[i]+0.5*a[i]*label[i]*lin[i];
969 
970  return(criterion);
971 }
972 
973 
974 void CSVMLight::clear_index(int32_t *index)
975  /* initializes and empties index */
976 {
977  index[0]=-1;
978 }
979 
980 void CSVMLight::add_to_index(int32_t *index, int32_t elem)
981  /* initializes and empties index */
982 {
983  register int32_t i;
984  for (i=0;index[i] != -1;i++);
985  index[i]=elem;
986  index[i+1]=-1;
987 }
988 
990  int32_t *binfeature, int32_t range, int32_t *index)
991  /* create an inverted index of binfeature */
992 {
993  register int32_t i,ii;
994 
995  ii=0;
996  for (i=0;i<range;i++) {
997  if(binfeature[i]) {
998  index[ii]=i;
999  ii++;
1000  }
1001  }
1002  for (i=0;i<4;i++) {
1003  index[ii+i]=-1;
1004  }
1005  return(ii);
1006 }
1007 
1008 
1010  int32_t* docs, int32_t* label, int32_t *exclude_from_eq_const,
1011  float64_t eq_target, int32_t *chosen, int32_t *active2dnum, int32_t totdoc,
1012  int32_t *working2dnum, int32_t varnum, float64_t *a, float64_t *lin,
1013  float64_t *c, float64_t *aicache, QP *qp, float64_t *epsilon_crit_target)
1014  /* Do optimization on the working set. */
1015 {
1016  int32_t i;
1017  float64_t *a_v;
1018 
1019  //compute_matrices_for_optimization_parallel(docs,label,
1020  // exclude_from_eq_const,eq_target,chosen,
1021  // active2dnum,working2dnum,a,lin,c,
1022  // varnum,totdoc,aicache,qp);
1023 
1025  exclude_from_eq_const,eq_target,chosen,
1026  active2dnum,working2dnum,a,lin,c,
1027  varnum,totdoc,aicache,qp);
1028 
1029  if(verbosity>=3) {
1030  SG_DEBUG("Running optimizer...")
1031  }
1032  /* call the qp-subsolver */
1033  a_v=optimize_qp(qp,epsilon_crit_target,
1034  learn_parm->svm_maxqpsize,
1035  &(model->b), /* in case the optimizer gives us */
1036  learn_parm->svm_maxqpsize); /* the threshold for free. otherwise */
1037  /* b is calculated in calculate_model. */
1038  if(verbosity>=3) {
1039  SG_DONE()
1040  }
1041 
1042  for (i=0;i<varnum;i++)
1043  a[working2dnum[i]]=a_v[i];
1044 }
1045 
1047  int32_t* docs, int32_t* label, int32_t *exclude_from_eq_const,
1048  float64_t eq_target, int32_t *chosen, int32_t *active2dnum, int32_t *key,
1049  float64_t *a, float64_t *lin, float64_t *c, int32_t varnum, int32_t totdoc,
1050  float64_t *aicache, QP *qp)
1051 {
1052  // TODO: port to use OpenMP backend instead of pthread
1053 #ifdef HAVE_PTHREAD
1054  int32_t num_threads=parallel->get_num_threads();
1055 #else
1056  int32_t num_threads=1;
1057 #endif
1058  if (num_threads < 2)
1059  {
1060  compute_matrices_for_optimization(docs, label, exclude_from_eq_const, eq_target,
1061  chosen, active2dnum, key, a, lin, c,
1062  varnum, totdoc, aicache, qp) ;
1063  }
1064 #ifdef HAVE_PTHREAD
1065  else
1066  {
1067  register int32_t ki,kj,i,j;
1068  register float64_t kernel_temp;
1069 
1070  qp->opt_n=varnum;
1071  qp->opt_ce0[0]=-eq_target; /* compute the constant for equality constraint */
1072  for (j=1;j<model->sv_num;j++) { /* start at 1 */
1073  if((!chosen[model->supvec[j]])
1074  && (!exclude_from_eq_const[(model->supvec[j])])) {
1075  qp->opt_ce0[0]+=model->alpha[j];
1076  }
1077  }
1078  if(learn_parm->biased_hyperplane)
1079  qp->opt_m=1;
1080  else
1081  qp->opt_m=0; /* eq-constraint will be ignored */
1082 
1083  /* init linear part of objective function */
1084  for (i=0;i<varnum;i++) {
1085  qp->opt_g0[i]=lin[key[i]];
1086  }
1087 
1088  ASSERT(num_threads>1)
1089  int32_t *KI=SG_MALLOC(int32_t, varnum*varnum);
1090  int32_t *KJ=SG_MALLOC(int32_t, varnum*varnum);
1091  int32_t Knum=0 ;
1092  float64_t *Kval = SG_MALLOC(float64_t, varnum*(varnum+1)/2);
1093  for (i=0;i<varnum;i++) {
1094  ki=key[i];
1095  KI[Knum]=docs[ki] ;
1096  KJ[Knum]=docs[ki] ;
1097  Knum++ ;
1098  for (j=i+1;j<varnum;j++)
1099  {
1100  kj=key[j];
1101  KI[Knum]=docs[ki] ;
1102  KJ[Knum]=docs[kj] ;
1103  Knum++ ;
1104  }
1105  }
1106  ASSERT(Knum<=varnum*(varnum+1)/2)
1107 
1108  pthread_t* threads = SG_MALLOC(pthread_t, num_threads-1);
1109  S_THREAD_PARAM_KERNEL* params = SG_MALLOC(S_THREAD_PARAM_KERNEL, num_threads-1);
1110  int32_t step= Knum/num_threads;
1111  //SG_DEBUG("\nkernel-step size: %i\n", step)
1112  for (int32_t t=0; t<num_threads-1; t++)
1113  {
1114  params[t].svmlight = this;
1115  params[t].start = t*step;
1116  params[t].end = (t+1)*step;
1117  params[t].KI=KI ;
1118  params[t].KJ=KJ ;
1119  params[t].Kval=Kval ;
1120  pthread_create(&threads[t], NULL, CSVMLight::compute_kernel_helper, (void*)&params[t]);
1121  }
1122  for (i=params[num_threads-2].end; i<Knum; i++)
1123  Kval[i]=compute_kernel(KI[i],KJ[i]) ;
1124 
1125  for (int32_t t=0; t<num_threads-1; t++)
1126  pthread_join(threads[t], NULL);
1127 
1128  SG_FREE(params);
1129  SG_FREE(threads);
1130 
1131  Knum=0 ;
1132  for (i=0;i<varnum;i++) {
1133  ki=key[i];
1134 
1135  /* Compute the matrix for equality constraints */
1136  qp->opt_ce[i]=label[ki];
1137  qp->opt_low[i]=0;
1138  qp->opt_up[i]=learn_parm->svm_cost[ki];
1139 
1140  kernel_temp=Kval[Knum] ;
1141  Knum++ ;
1142  /* compute linear part of objective function */
1143  qp->opt_g0[i]-=(kernel_temp*a[ki]*(float64_t)label[ki]);
1144  /* compute quadratic part of objective function */
1145  qp->opt_g[varnum*i+i]=kernel_temp;
1146 
1147  for (j=i+1;j<varnum;j++) {
1148  kj=key[j];
1149  kernel_temp=Kval[Knum] ;
1150  Knum++ ;
1151  /* compute linear part of objective function */
1152  qp->opt_g0[i]-=(kernel_temp*a[kj]*(float64_t)label[kj]);
1153  qp->opt_g0[j]-=(kernel_temp*a[ki]*(float64_t)label[ki]);
1154  /* compute quadratic part of objective function */
1155  qp->opt_g[varnum*i+j]=(float64_t)label[ki]*(float64_t)label[kj]*kernel_temp;
1156  qp->opt_g[varnum*j+i]=qp->opt_g[varnum*i+j];//(float64_t)label[ki]*(float64_t)label[kj]*kernel_temp;
1157  }
1158 
1159  if(verbosity>=3) {
1160  if(i % 20 == 0) {
1161  SG_DEBUG("%ld..",i)
1162  }
1163  }
1164  }
1165 
1166  SG_FREE(KI);
1167  SG_FREE(KJ);
1168  SG_FREE(Kval);
1169 
1170  for (i=0;i<varnum;i++) {
1171  /* assure starting at feasible point */
1172  qp->opt_xinit[i]=a[key[i]];
1173  /* set linear part of objective function */
1174  qp->opt_g0[i]=(learn_parm->eps[key[i]]-(float64_t)label[key[i]]*c[key[i]])+qp->opt_g0[i]*(float64_t)label[key[i]];
1175  }
1176 
1177  if(verbosity>=3) {
1178  SG_DONE()
1179  }
1180  }
1181 #endif
1182 }
1183 
1185  int32_t* docs, int32_t* label, int32_t *exclude_from_eq_const,
1186  float64_t eq_target, int32_t *chosen, int32_t *active2dnum, int32_t *key,
1187  float64_t *a, float64_t *lin, float64_t *c, int32_t varnum, int32_t totdoc,
1188  float64_t *aicache, QP *qp)
1189 {
1190  register int32_t ki,kj,i,j;
1191  register float64_t kernel_temp;
1192 
1193  qp->opt_n=varnum;
1194  qp->opt_ce0[0]=-eq_target; /* compute the constant for equality constraint */
1195  for (j=1;j<model->sv_num;j++) { /* start at 1 */
1196  if((!chosen[model->supvec[j]])
1197  && (!exclude_from_eq_const[(model->supvec[j])])) {
1198  qp->opt_ce0[0]+=model->alpha[j];
1199  }
1200  }
1201  if(learn_parm->biased_hyperplane)
1202  qp->opt_m=1;
1203  else
1204  qp->opt_m=0; /* eq-constraint will be ignored */
1205 
1206  /* init linear part of objective function */
1207  for (i=0;i<varnum;i++) {
1208  qp->opt_g0[i]=lin[key[i]];
1209  }
1210 
1211  for (i=0;i<varnum;i++) {
1212  ki=key[i];
1213 
1214  /* Compute the matrix for equality constraints */
1215  qp->opt_ce[i]=label[ki];
1216  qp->opt_low[i]=0;
1217  qp->opt_up[i]=learn_parm->svm_cost[ki];
1218 
1219  kernel_temp=compute_kernel(docs[ki], docs[ki]);
1220  /* compute linear part of objective function */
1221  qp->opt_g0[i]-=(kernel_temp*a[ki]*(float64_t)label[ki]);
1222  /* compute quadratic part of objective function */
1223  qp->opt_g[varnum*i+i]=kernel_temp;
1224 
1225  for (j=i+1;j<varnum;j++) {
1226  kj=key[j];
1227  kernel_temp=compute_kernel(docs[ki], docs[kj]);
1228 
1229  /* compute linear part of objective function */
1230  qp->opt_g0[i]-=(kernel_temp*a[kj]*(float64_t)label[kj]);
1231  qp->opt_g0[j]-=(kernel_temp*a[ki]*(float64_t)label[ki]);
1232  /* compute quadratic part of objective function */
1233  qp->opt_g[varnum*i+j]=(float64_t)label[ki]*(float64_t)label[kj]*kernel_temp;
1234  qp->opt_g[varnum*j+i]=qp->opt_g[varnum*i+j];//(float64_t)label[ki]*(float64_t)label[kj]*kernel_temp;
1235  }
1236 
1237  if(verbosity>=3) {
1238  if(i % 20 == 0) {
1239  SG_DEBUG("%ld..",i)
1240  }
1241  }
1242  }
1243 
1244  for (i=0;i<varnum;i++) {
1245  /* assure starting at feasible point */
1246  qp->opt_xinit[i]=a[key[i]];
1247  /* set linear part of objective function */
1248  qp->opt_g0[i]=(learn_parm->eps[key[i]]-(float64_t)label[key[i]]*c[key[i]]) + qp->opt_g0[i]*(float64_t)label[key[i]];
1249  }
1250 
1251  if(verbosity>=3) {
1252  SG_DONE()
1253  }
1254 }
1255 
1256 
1258  int32_t* docs, int32_t *label, float64_t *lin, float64_t *a,
1259  float64_t *a_old, float64_t *c, int32_t *working2dnum, int32_t *active2dnum)
1260  /* Compute decision function based on current values */
1261  /* of alpha. */
1262 {
1263  int32_t i,ii,pos,b_calculated=0,first_low,first_high;
1264  float64_t ex_c,b_temp,b_low,b_high;
1265 
1266  if(verbosity>=3) {
1267  SG_DEBUG("Calculating model...")
1268  }
1269 
1270  if(!learn_parm->biased_hyperplane) {
1271  model->b=0;
1272  b_calculated=1;
1273  }
1274 
1275  for (ii=0;(i=working2dnum[ii])>=0;ii++) {
1276  if((a_old[i]>0) && (a[i]==0)) { /* remove from model */
1277  pos=model->index[i];
1278  model->index[i]=-1;
1279  (model->sv_num)--;
1280  model->supvec[pos]=model->supvec[model->sv_num];
1281  model->alpha[pos]=model->alpha[model->sv_num];
1282  model->index[model->supvec[pos]]=pos;
1283  }
1284  else if((a_old[i]==0) && (a[i]>0)) { /* add to model */
1285  model->supvec[model->sv_num]=docs[i];
1286  model->alpha[model->sv_num]=a[i]*(float64_t)label[i];
1287  model->index[i]=model->sv_num;
1288  (model->sv_num)++;
1289  }
1290  else if(a_old[i]==a[i]) { /* nothing to do */
1291  }
1292  else { /* just update alpha */
1293  model->alpha[model->index[i]]=a[i]*(float64_t)label[i];
1294  }
1295 
1296  ex_c=learn_parm->svm_cost[i]-learn_parm->epsilon_a;
1297  if((a_old[i]>=ex_c) && (a[i]<ex_c)) {
1298  (model->at_upper_bound)--;
1299  }
1300  else if((a_old[i]<ex_c) && (a[i]>=ex_c)) {
1301  (model->at_upper_bound)++;
1302  }
1303 
1304  if((!b_calculated)
1305  && (a[i]>learn_parm->epsilon_a) && (a[i]<ex_c)) { /* calculate b */
1306  model->b=((float64_t)label[i]*learn_parm->eps[i]-c[i]+lin[i]);
1307  b_calculated=1;
1308  }
1309  }
1310 
1311  /* No alpha in the working set not at bounds, so b was not
1312  calculated in the usual way. The following handles this special
1313  case. */
1314  if(learn_parm->biased_hyperplane
1315  && (!b_calculated)
1316  && (model->sv_num-1 == model->at_upper_bound)) {
1317  first_low=1;
1318  first_high=1;
1319  b_low=0;
1320  b_high=0;
1321  for (ii=0;(i=active2dnum[ii])>=0;ii++) {
1322  ex_c=learn_parm->svm_cost[i]-learn_parm->epsilon_a;
1323  if(a[i]<ex_c) {
1324  if(label[i]>0) {
1325  b_temp=-(learn_parm->eps[i]-c[i]+lin[i]);
1326  if((b_temp>b_low) || (first_low)) {
1327  b_low=b_temp;
1328  first_low=0;
1329  }
1330  }
1331  else {
1332  b_temp=-(-learn_parm->eps[i]-c[i]+lin[i]);
1333  if((b_temp<b_high) || (first_high)) {
1334  b_high=b_temp;
1335  first_high=0;
1336  }
1337  }
1338  }
1339  else {
1340  if(label[i]<0) {
1341  b_temp=-(-learn_parm->eps[i]-c[i]+lin[i]);
1342  if((b_temp>b_low) || (first_low)) {
1343  b_low=b_temp;
1344  first_low=0;
1345  }
1346  }
1347  else {
1348  b_temp=-(learn_parm->eps[i]-c[i]+lin[i]);
1349  if((b_temp<b_high) || (first_high)) {
1350  b_high=b_temp;
1351  first_high=0;
1352  }
1353  }
1354  }
1355  }
1356  if(first_high) {
1357  model->b=-b_low;
1358  }
1359  else if(first_low) {
1360  model->b=-b_high;
1361  }
1362  else {
1363  model->b=-(b_high+b_low)/2.0; /* select b as the middle of range */
1364  }
1365  }
1366 
1367  if(verbosity>=3) {
1368  SG_DONE()
1369  }
1370 
1371  return(model->sv_num-1); /* have to substract one, since element 0 is empty*/
1372 }
1373 
1375  int32_t* label, float64_t *a, float64_t *lin, float64_t *c, int32_t totdoc,
1376  float64_t *maxdiff, float64_t epsilon_crit_org, int32_t *misclassified,
1377  int32_t *inconsistent, int32_t *active2dnum, int32_t *last_suboptimal_at,
1378  int32_t iteration)
1379  /* Check KT-conditions */
1380 {
1381  int32_t i,ii,retrain;
1382  float64_t dist,ex_c,target;
1383 
1385  learn_parm->epsilon_shrink=-learn_parm->epsilon_crit+epsilon_crit_org;
1386  else
1387  learn_parm->epsilon_shrink=learn_parm->epsilon_shrink*0.7+(*maxdiff)*0.3;
1388  retrain=0;
1389  (*maxdiff)=0;
1390  (*misclassified)=0;
1391  for (ii=0;(i=active2dnum[ii])>=0;ii++) {
1392  if((!inconsistent[i]) && label[i]) {
1393  dist=(lin[i]-model->b)*(float64_t)label[i];/* 'distance' from
1394  hyperplane*/
1395  target=-(learn_parm->eps[i]-(float64_t)label[i]*c[i]);
1396  ex_c=learn_parm->svm_cost[i]-learn_parm->epsilon_a;
1397  if(dist <= 0) {
1398  (*misclassified)++; /* does not work due to deactivation of var */
1399  }
1400  if((a[i]>learn_parm->epsilon_a) && (dist > target)) {
1401  if((dist-target)>(*maxdiff)) /* largest violation */
1402  (*maxdiff)=dist-target;
1403  }
1404  else if((a[i]<ex_c) && (dist < target)) {
1405  if((target-dist)>(*maxdiff)) /* largest violation */
1406  (*maxdiff)=target-dist;
1407  }
1408  /* Count how int32_t a variable was at lower/upper bound (and optimal).*/
1409  /* Variables, which were at the bound and optimal for a int32_t */
1410  /* time are unlikely to become support vectors. In case our */
1411  /* cache is filled up, those variables are excluded to save */
1412  /* kernel evaluations. (See chapter 'Shrinking').*/
1413  if((a[i]>(learn_parm->epsilon_a))
1414  && (a[i]<ex_c)) {
1415  last_suboptimal_at[i]=iteration; /* not at bound */
1416  }
1417  else if((a[i]<=(learn_parm->epsilon_a))
1418  && (dist < (target+learn_parm->epsilon_shrink))) {
1419  last_suboptimal_at[i]=iteration; /* not likely optimal */
1420  }
1421  else if((a[i]>=ex_c)
1422  && (dist > (target-learn_parm->epsilon_shrink))) {
1423  last_suboptimal_at[i]=iteration; /* not likely optimal */
1424  }
1425  }
1426  }
1427 
1428  /* termination criterion */
1429  if((!retrain) && ((*maxdiff) > learn_parm->epsilon_crit)) {
1430  retrain=1;
1431  }
1432  return(retrain);
1433 }
1434 
1436  int32_t* docs, int32_t* label, int32_t *active2dnum, float64_t *a,
1437  float64_t *a_old, int32_t *working2dnum, int32_t totdoc, float64_t *lin,
1438  float64_t *aicache, float64_t* c)
1439  /* keep track of the linear component */
1440  /* lin of the gradient etc. by updating */
1441  /* based on the change of the variables */
1442  /* in the current working set */
1443 {
1444  register int32_t i=0,ii=0,j=0,jj=0;
1445 
1447  {
1448  if (callback)
1449  {
1450  update_linear_component_mkl_linadd(docs, label, active2dnum, a,
1451  a_old, working2dnum, totdoc, lin, aicache);
1452  }
1453  else
1454  {
1455  kernel->clear_normal();
1456 
1457  int32_t num_working=0;
1458  for (ii=0;(i=working2dnum[ii])>=0;ii++) {
1459  if(a[i] != a_old[i]) {
1460  kernel->add_to_normal(docs[i], (a[i]-a_old[i])*(float64_t)label[i]);
1461  num_working++;
1462  }
1463  }
1464 
1465  if (num_working>0)
1466  {
1467  // TODO: port to use OpenMP backend instead of pthread
1468 #ifdef HAVE_PTHREAD
1469  int32_t num_threads=parallel->get_num_threads();
1470 #else
1471  int32_t num_threads=1;
1472 #endif
1473  if (num_threads < 2)
1474  {
1475  for (jj=0;(j=active2dnum[jj])>=0;jj++) {
1476  lin[j]+=kernel->compute_optimized(docs[j]);
1477  }
1478  }
1479 #ifdef HAVE_PTHREAD
1480  else
1481  {
1482  int32_t num_elem = 0 ;
1483  for (jj=0;(j=active2dnum[jj])>=0;jj++) num_elem++ ;
1484 
1485  pthread_t* threads = SG_MALLOC(pthread_t, num_threads-1);
1486  S_THREAD_PARAM_SVMLIGHT* params = SG_MALLOC(S_THREAD_PARAM_SVMLIGHT, num_threads-1);
1487  int32_t start = 0 ;
1488  int32_t step = num_elem/num_threads;
1489  int32_t end = step ;
1490 
1491  for (int32_t t=0; t<num_threads-1; t++)
1492  {
1493  params[t].kernel = kernel ;
1494  params[t].lin = lin ;
1495  params[t].docs = docs ;
1496  params[t].active2dnum=active2dnum ;
1497  params[t].start = start ;
1498  params[t].end = end ;
1499  start=end ;
1500  end+=step ;
1501  pthread_create(&threads[t], NULL, update_linear_component_linadd_helper, (void*)&params[t]) ;
1502  }
1503 
1504  for (jj=params[num_threads-2].end;(j=active2dnum[jj])>=0;jj++) {
1505  lin[j]+=kernel->compute_optimized(docs[j]);
1506  }
1507  void* ret;
1508  for (int32_t t=0; t<num_threads-1; t++)
1509  pthread_join(threads[t], &ret) ;
1510 
1511  SG_FREE(params);
1512  SG_FREE(threads);
1513  }
1514 #endif
1515  }
1516  }
1517  }
1518  else
1519  {
1520  if (callback)
1521  {
1522  update_linear_component_mkl(docs, label, active2dnum,
1523  a, a_old, working2dnum, totdoc, lin, aicache);
1524  }
1525  else {
1526  for (jj=0;(i=working2dnum[jj])>=0;jj++) {
1527  if(a[i] != a_old[i]) {
1528  kernel->get_kernel_row(i,active2dnum,aicache);
1529  for (ii=0;(j=active2dnum[ii])>=0;ii++)
1530  lin[j]+=(a[i]-a_old[i])*aicache[j]*(float64_t)label[i];
1531  }
1532  }
1533  }
1534  }
1535 }
1536 
1537 
1539  int32_t* docs, int32_t* label, int32_t *active2dnum, float64_t *a,
1540  float64_t *a_old, int32_t *working2dnum, int32_t totdoc, float64_t *lin,
1541  float64_t *aicache)
1542 {
1543  //int inner_iters=0;
1544  int32_t num = kernel->get_num_vec_rhs();
1545  int32_t num_weights = -1;
1546  int32_t num_kernels = kernel->get_num_subkernels() ;
1547  const float64_t* old_beta = kernel->get_subkernel_weights(num_weights);
1548  ASSERT(num_weights==num_kernels)
1549 
1550  if ((kernel->get_kernel_type()==K_COMBINED) &&
1551  (!((CCombinedKernel*) kernel)->get_append_subkernel_weights()))// for combined kernel
1552  {
1554 
1555  int32_t n = 0, i, j ;
1556 
1557  for (index_t k_idx=0; k_idx<k->get_num_kernels(); k_idx++)
1558  {
1559  CKernel* kn = k->get_kernel(k_idx);
1560  for (i=0;i<num;i++)
1561  {
1562  if(a[i] != a_old[i])
1563  {
1564  kn->get_kernel_row(i,NULL,aicache, true);
1565  for (j=0;j<num;j++)
1566  W[j*num_kernels+n]+=(a[i]-a_old[i])*aicache[j]*(float64_t)label[i];
1567  }
1568  }
1569 
1570  SG_UNREF(kn);
1571  n++ ;
1572  }
1573  }
1574  else // hope the kernel is fast ...
1575  {
1576  float64_t* w_backup = SG_MALLOC(float64_t, num_kernels);
1577  float64_t* w1 = SG_MALLOC(float64_t, num_kernels);
1578 
1579  // backup and set to zero
1580  for (int32_t i=0; i<num_kernels; i++)
1581  {
1582  w_backup[i] = old_beta[i] ;
1583  w1[i]=0.0 ;
1584  }
1585  for (int32_t n=0; n<num_kernels; n++)
1586  {
1587  w1[n]=1.0 ;
1589 
1590  for (int32_t i=0;i<num;i++)
1591  {
1592  if(a[i] != a_old[i])
1593  {
1594  for (int32_t j=0;j<num;j++)
1595  W[j*num_kernels+n]+=(a[i]-a_old[i])*compute_kernel(i,j)*(float64_t)label[i];
1596  }
1597  }
1598  w1[n]=0.0 ;
1599  }
1600 
1601  // restore old weights
1602  kernel->set_subkernel_weights(SGVector<float64_t>(w_backup,num_weights));
1603 
1604  SG_FREE(w_backup);
1605  SG_FREE(w1);
1606  }
1607 
1608  call_mkl_callback(a, label, lin);
1609 }
1610 
1611 
1613  int32_t* docs, int32_t* label, int32_t *active2dnum, float64_t *a,
1614  float64_t *a_old, int32_t *working2dnum, int32_t totdoc, float64_t *lin,
1615  float64_t *aicache)
1616 {
1617  //int inner_iters=0;
1618 
1619  // kernel with LP_LINADD property is assumed to have
1620  // compute_by_subkernel functions
1621  int32_t num = kernel->get_num_vec_rhs();
1622  int32_t num_weights = -1;
1623  int32_t num_kernels = kernel->get_num_subkernels() ;
1624  const float64_t* old_beta = kernel->get_subkernel_weights(num_weights);
1625  ASSERT(num_weights==num_kernels)
1626 
1627  float64_t* w_backup = SG_MALLOC(float64_t, num_kernels);
1628  float64_t* w1 = SG_MALLOC(float64_t, num_kernels);
1629 
1630  // backup and set to one
1631  for (int32_t i=0; i<num_kernels; i++)
1632  {
1633  w_backup[i] = old_beta[i] ;
1634  w1[i]=1.0 ;
1635  }
1636  // set the kernel weights
1638 
1639  // create normal update (with changed alphas only)
1640  kernel->clear_normal();
1641  for (int32_t ii=0, i=0;(i=working2dnum[ii])>=0;ii++) {
1642  if(a[i] != a_old[i]) {
1643  kernel->add_to_normal(docs[i], (a[i]-a_old[i])*(float64_t)label[i]);
1644  }
1645  }
1646  // TODO: port to use OpenMP backend instead of pthread
1647 #ifdef HAVE_PTHREAD
1648  int32_t num_threads=parallel->get_num_threads();
1649 #else
1650  int32_t num_threads=1;
1651 #endif
1652 
1653  if (num_threads < 2)
1654  {
1655  // determine contributions of different kernels
1656  for (int32_t i=0; i<num; i++)
1657  kernel->compute_by_subkernel(i,&W[i*num_kernels]);
1658  }
1659 #ifdef HAVE_PTHREAD
1660  else
1661  {
1662  pthread_t* threads = SG_MALLOC(pthread_t, num_threads-1);
1663  S_THREAD_PARAM_SVMLIGHT* params = SG_MALLOC(S_THREAD_PARAM_SVMLIGHT, num_threads-1);
1664  int32_t step= num/num_threads;
1665 
1666  for (int32_t t=0; t<num_threads-1; t++)
1667  {
1668  params[t].kernel = kernel;
1669  params[t].W = W;
1670  params[t].start = t*step;
1671  params[t].end = (t+1)*step;
1672  pthread_create(&threads[t], NULL, CSVMLight::update_linear_component_mkl_linadd_helper, (void*)&params[t]);
1673  }
1674 
1675  for (int32_t i=params[num_threads-2].end; i<num; i++)
1676  kernel->compute_by_subkernel(i,&W[i*num_kernels]);
1677 
1678  for (int32_t t=0; t<num_threads-1; t++)
1679  pthread_join(threads[t], NULL);
1680 
1681  SG_FREE(params);
1682  SG_FREE(threads);
1683  }
1684 #endif
1685 
1686  // restore old weights
1687  kernel->set_subkernel_weights(SGVector<float64_t>(w_backup,num_weights));
1688 
1689  call_mkl_callback(a, label, lin);
1690 }
1691 
1693 {
1694  S_THREAD_PARAM_SVMLIGHT* params = (S_THREAD_PARAM_SVMLIGHT*) p;
1695 
1696  int32_t num_kernels=params->kernel->get_num_subkernels();
1697 
1698  // determine contributions of different kernels
1699  for (int32_t i=params->start; i<params->end; i++)
1700  params->kernel->compute_by_subkernel(i,&(params->W[i*num_kernels]));
1701 
1702  return NULL ;
1703 }
1704 
1705 void CSVMLight::call_mkl_callback(float64_t* a, int32_t* label, float64_t* lin)
1706 {
1707  int32_t num = kernel->get_num_vec_rhs();
1708  int32_t num_kernels = kernel->get_num_subkernels() ;
1709 
1710  float64_t suma=0;
1711  float64_t* sumw=SG_MALLOC(float64_t, num_kernels);
1712 #ifdef HAVE_LAPACK
1713  int nk = (int) num_kernels; /* calling external lib */
1714  double* alphay = SG_MALLOC(double, num);
1715 
1716  for (int32_t i=0; i<num; i++)
1717  {
1718  alphay[i]=a[i]*label[i];
1719  suma+=a[i];
1720  }
1721 
1722  for (int32_t i=0; i<num_kernels; i++)
1723  sumw[i]=0;
1724 
1725  cblas_dgemv(CblasColMajor, CblasNoTrans, num_kernels, (int) num, 0.5, (double*) W,
1726  num_kernels, alphay, 1, 1.0, (double*) sumw, 1);
1727 
1728  SG_FREE(alphay);
1729 #else
1730  for (int32_t i=0; i<num; i++)
1731  suma += a[i];
1732 
1733  for (int32_t d=0; d<num_kernels; d++)
1734  {
1735  sumw[d]=0;
1736  for (int32_t i=0; i<num; i++)
1737  sumw[d] += a[i]*(0.5*label[i]*W[i*num_kernels+d]);
1738  }
1739 #endif
1740 
1741  if (callback)
1742  mkl_converged=callback(mkl, sumw, suma);
1743 
1744 
1745  const float64_t* new_beta = kernel->get_subkernel_weights(num_kernels);
1746 
1747  // update lin
1748 #ifdef HAVE_LAPACK
1749  cblas_dgemv(CblasColMajor, CblasTrans, nk, (int) num, 1.0, (double*) W,
1750  nk, (double*) new_beta, 1, 0.0, (double*) lin, 1);
1751 #else
1752  for (int32_t i=0; i<num; i++)
1753  lin[i]=0 ;
1754  for (int32_t d=0; d<num_kernels; d++)
1755  if (new_beta[d]!=0)
1756  for (int32_t i=0; i<num; i++)
1757  lin[i] += new_beta[d]*W[i*num_kernels+d] ;
1758 #endif
1759 
1760  SG_FREE(sumw);
1761 }
1762 
1763 
1764 /*************************** Working set selection ***************************/
1765 
1767  int32_t* label, float64_t *a, float64_t *lin, float64_t *c, int32_t totdoc,
1768  int32_t qp_size, int32_t *inconsistent, int32_t *active2dnum,
1769  int32_t *working2dnum, float64_t *selcrit, int32_t *select,
1770  int32_t cache_only, int32_t *key, int32_t *chosen)
1771  /* Use the feasible direction approach to select the next
1772  qp-subproblem (see chapter 'Selecting a good working set'). If
1773  'cache_only' is true, then the variables are selected only among
1774  those for which the kernel evaluations are cached. */
1775 {
1776  int32_t choosenum,i,j,k,activedoc,inum,valid;
1777  float64_t s;
1778 
1779  for (inum=0;working2dnum[inum]>=0;inum++); /* find end of index */
1780  choosenum=0;
1781  activedoc=0;
1782  for (i=0;(j=active2dnum[i])>=0;i++) {
1783  s=-label[j];
1784  if(cache_only)
1785  {
1786  if (use_kernel_cache)
1787  valid=(kernel->kernel_cache_check(j));
1788  else
1789  valid = 1 ;
1790  }
1791  else
1792  valid=1;
1793  if(valid
1794  && (!((a[j]<=(0+learn_parm->epsilon_a)) && (s<0)))
1795  && (!((a[j]>=(learn_parm->svm_cost[j]-learn_parm->epsilon_a))
1796  && (s>0)))
1797  && (!chosen[j])
1798  && (label[j])
1799  && (!inconsistent[j]))
1800  {
1801  selcrit[activedoc]=(float64_t)label[j]*(learn_parm->eps[j]-(float64_t)label[j]*c[j]+(float64_t)label[j]*lin[j]);
1802  key[activedoc]=j;
1803  activedoc++;
1804  }
1805  }
1806  select_top_n(selcrit,activedoc,select,(int32_t)(qp_size/2));
1807  for (k=0;(choosenum<(qp_size/2)) && (k<(qp_size/2)) && (k<activedoc);k++) {
1808  i=key[select[k]];
1809  chosen[i]=1;
1810  working2dnum[inum+choosenum]=i;
1811  choosenum+=1;
1812  if (use_kernel_cache)
1814  /* make sure it does not get kicked */
1815  /* out of cache */
1816  }
1817 
1818  activedoc=0;
1819  for (i=0;(j=active2dnum[i])>=0;i++) {
1820  s=label[j];
1821  if(cache_only)
1822  {
1823  if (use_kernel_cache)
1824  valid=(kernel->kernel_cache_check(j));
1825  else
1826  valid = 1 ;
1827  }
1828  else
1829  valid=1;
1830  if(valid
1831  && (!((a[j]<=(0+learn_parm->epsilon_a)) && (s<0)))
1832  && (!((a[j]>=(learn_parm->svm_cost[j]-learn_parm->epsilon_a))
1833  && (s>0)))
1834  && (!chosen[j])
1835  && (label[j])
1836  && (!inconsistent[j]))
1837  {
1838  selcrit[activedoc]=-(float64_t)label[j]*(learn_parm->eps[j]-(float64_t)label[j]*c[j]+(float64_t)label[j]*lin[j]);
1839  /* selcrit[activedoc]=-(float64_t)(label[j]*(-1.0+(float64_t)label[j]*lin[j])); */
1840  key[activedoc]=j;
1841  activedoc++;
1842  }
1843  }
1844  select_top_n(selcrit,activedoc,select,(int32_t)(qp_size/2));
1845  for (k=0;(choosenum<qp_size) && (k<(qp_size/2)) && (k<activedoc);k++) {
1846  i=key[select[k]];
1847  chosen[i]=1;
1848  working2dnum[inum+choosenum]=i;
1849  choosenum+=1;
1850  if (use_kernel_cache)
1851  kernel->kernel_cache_touch(i); /* make sure it does not get kicked */
1852  /* out of cache */
1853  }
1854  working2dnum[inum+choosenum]=-1; /* complete index */
1855  return(choosenum);
1856 }
1857 
1859  int32_t* label, float64_t *a, float64_t *lin, float64_t *c, int32_t totdoc,
1860  int32_t qp_size, int32_t *inconsistent, int32_t *active2dnum,
1861  int32_t *working2dnum, float64_t *selcrit, int32_t *select, int32_t *key,
1862  int32_t *chosen, int32_t iteration)
1863 /* Use the feasible direction approach to select the next
1864  qp-subproblem (see section 'Selecting a good working set'). Chooses
1865  a feasible direction at (pseudo) random to help jump over numerical
1866  problem. */
1867 {
1868  int32_t choosenum,i,j,k,activedoc,inum;
1869  float64_t s;
1870 
1871  for (inum=0;working2dnum[inum]>=0;inum++); /* find end of index */
1872  choosenum=0;
1873  activedoc=0;
1874  for (i=0;(j=active2dnum[i])>=0;i++) {
1875  s=-label[j];
1876  if((!((a[j]<=(0+learn_parm->epsilon_a)) && (s<0)))
1877  && (!((a[j]>=(learn_parm->svm_cost[j]-learn_parm->epsilon_a))
1878  && (s>0)))
1879  && (!inconsistent[j])
1880  && (label[j])
1881  && (!chosen[j])) {
1882  selcrit[activedoc]=(j+iteration) % totdoc;
1883  key[activedoc]=j;
1884  activedoc++;
1885  }
1886  }
1887  select_top_n(selcrit,activedoc,select,(int32_t)(qp_size/2));
1888  for (k=0;(choosenum<(qp_size/2)) && (k<(qp_size/2)) && (k<activedoc);k++) {
1889  i=key[select[k]];
1890  chosen[i]=1;
1891  working2dnum[inum+choosenum]=i;
1892  choosenum+=1;
1893  if (use_kernel_cache)
1894  kernel->kernel_cache_touch(i); /* make sure it does not get kicked */
1895  /* out of cache */
1896  }
1897 
1898  activedoc=0;
1899  for (i=0;(j=active2dnum[i])>=0;i++) {
1900  s=label[j];
1901  if((!((a[j]<=(0+learn_parm->epsilon_a)) && (s<0)))
1902  && (!((a[j]>=(learn_parm->svm_cost[j]-learn_parm->epsilon_a))
1903  && (s>0)))
1904  && (!inconsistent[j])
1905  && (label[j])
1906  && (!chosen[j])) {
1907  selcrit[activedoc]=(j+iteration) % totdoc;
1908  key[activedoc]=j;
1909  activedoc++;
1910  }
1911  }
1912  select_top_n(selcrit,activedoc,select,(int32_t)(qp_size/2));
1913  for (k=0;(choosenum<qp_size) && (k<(qp_size/2)) && (k<activedoc);k++) {
1914  i=key[select[k]];
1915  chosen[i]=1;
1916  working2dnum[inum+choosenum]=i;
1917  choosenum+=1;
1918  if (use_kernel_cache)
1919  kernel->kernel_cache_touch(i); /* make sure it does not get kicked */
1920  /* out of cache */
1921  }
1922  working2dnum[inum+choosenum]=-1; /* complete index */
1923  return(choosenum);
1924 }
1925 
1926 
1927 
1929  float64_t *selcrit, int32_t range, int32_t* select, int32_t n)
1930 {
1931  register int32_t i,j;
1932 
1933  for (i=0;(i<n) && (i<range);i++) { /* Initialize with the first n elements */
1934  for (j=i;j>=0;j--) {
1935  if((j>0) && (selcrit[select[j-1]]<selcrit[i])){
1936  select[j]=select[j-1];
1937  }
1938  else {
1939  select[j]=i;
1940  j=-1;
1941  }
1942  }
1943  }
1944  if(n>0) {
1945  for (i=n;i<range;i++) {
1946  if(selcrit[i]>selcrit[select[n-1]]) {
1947  for (j=n-1;j>=0;j--) {
1948  if((j>0) && (selcrit[select[j-1]]<selcrit[i])) {
1949  select[j]=select[j-1];
1950  }
1951  else {
1952  select[j]=i;
1953  j=-1;
1954  }
1955  }
1956  }
1957  }
1958  }
1959 }
1960 
1961 
1962 /******************************** Shrinking *********************************/
1963 
1965  SHRINK_STATE *shrink_state, int32_t totdoc, int32_t maxhistory)
1966 {
1967  int32_t i;
1968 
1969  shrink_state->deactnum=0;
1970  shrink_state->active = SG_MALLOC(int32_t, totdoc);
1971  shrink_state->inactive_since = SG_MALLOC(int32_t, totdoc);
1972  shrink_state->a_history = SG_MALLOC(float64_t*, maxhistory);
1973  shrink_state->maxhistory=maxhistory;
1974  shrink_state->last_lin = SG_MALLOC(float64_t, totdoc);
1975  shrink_state->last_a = SG_MALLOC(float64_t, totdoc);
1976 
1977  for (i=0;i<totdoc;i++) {
1978  shrink_state->active[i]=1;
1979  shrink_state->inactive_since[i]=0;
1980  shrink_state->last_a[i]=0;
1981  shrink_state->last_lin[i]=0;
1982  }
1983 }
1984 
1985 void CSVMLight::shrink_state_cleanup(SHRINK_STATE *shrink_state)
1986 {
1987  SG_FREE(shrink_state->active);
1988  SG_FREE(shrink_state->inactive_since);
1989  if(shrink_state->deactnum > 0)
1990  SG_FREE((shrink_state->a_history[shrink_state->deactnum-1]));
1991  SG_FREE((shrink_state->a_history));
1992  SG_FREE((shrink_state->last_a));
1993  SG_FREE((shrink_state->last_lin));
1994 }
1995 
1997  SHRINK_STATE *shrink_state, int32_t *active2dnum,
1998  int32_t *last_suboptimal_at, int32_t iteration, int32_t totdoc,
1999  int32_t minshrink, float64_t *a, int32_t *inconsistent, float64_t* c,
2000  float64_t* lin, int* label)
2001  /* Shrink some variables away. Do the shrinking only if at least
2002  minshrink variables can be removed. */
2003 {
2004  int32_t i,ii,change,activenum,lastiter;
2005  float64_t *a_old=NULL;
2006 
2007  activenum=0;
2008  change=0;
2009  for (ii=0;active2dnum[ii]>=0;ii++) {
2010  i=active2dnum[ii];
2011  activenum++;
2012  lastiter=last_suboptimal_at[i];
2013 
2014  if(((iteration-lastiter) > learn_parm->svm_iter_to_shrink)
2015  || (inconsistent[i])) {
2016  change++;
2017  }
2018  }
2019  if((change>=minshrink) /* shrink only if sufficiently many candidates */
2020  && (shrink_state->deactnum<shrink_state->maxhistory)) { /* and enough memory */
2021  /* Shrink problem by removing those variables which are */
2022  /* optimal at a bound for a minimum number of iterations */
2023  if(verbosity>=2) {
2024  SG_INFO(" Shrinking...")
2025  }
2026 
2027  if (!(kernel->has_property(KP_LINADD) && get_linadd_enabled())) { /* non-linear case save alphas */
2028 
2029  a_old=SG_MALLOC(float64_t, totdoc);
2030  shrink_state->a_history[shrink_state->deactnum]=a_old;
2031  for (i=0;i<totdoc;i++) {
2032  a_old[i]=a[i];
2033  }
2034  }
2035  for (ii=0;active2dnum[ii]>=0;ii++) {
2036  i=active2dnum[ii];
2037  lastiter=last_suboptimal_at[i];
2038  if(((iteration-lastiter) > learn_parm->svm_iter_to_shrink)
2039  || (inconsistent[i])) {
2040  shrink_state->active[i]=0;
2041  shrink_state->inactive_since[i]=shrink_state->deactnum;
2042  }
2043  }
2044  activenum=compute_index(shrink_state->active,totdoc,active2dnum);
2045  shrink_state->deactnum++;
2047  shrink_state->deactnum=0;
2048 
2049  if(verbosity>=2) {
2050  SG_DONE()
2051  SG_DEBUG("Number of inactive variables = %ld\n", totdoc-activenum)
2052  }
2053  }
2054  return(activenum);
2055 }
2056 
2058 {
2059  S_THREAD_PARAM_REACTIVATE_LINADD* params = (S_THREAD_PARAM_REACTIVATE_LINADD*) p;
2060 
2061  CKernel* k = params->kernel;
2062  float64_t* lin = params->lin;
2063  float64_t* last_lin = params->last_lin;
2064  int32_t* active = params->active;
2065  int32_t* docs = params->docs;
2066  int32_t start = params->start;
2067  int32_t end = params->end;
2068 
2069  for (int32_t i=start;i<end;i++)
2070  {
2071  if (!active[i])
2072  lin[i] = last_lin[i]+k->compute_optimized(docs[i]);
2073 
2074  last_lin[i]=lin[i];
2075  }
2076 
2077  return NULL;
2078 }
2079 
2081 {
2082  S_THREAD_PARAM_REACTIVATE_VANILLA* params = (S_THREAD_PARAM_REACTIVATE_VANILLA*) p;
2083  ASSERT(params)
2084  ASSERT(params->kernel)
2085  ASSERT(params->lin)
2086  ASSERT(params->aicache)
2087  ASSERT(params->a)
2088  ASSERT(params->a_old)
2089  ASSERT(params->changed2dnum)
2090  ASSERT(params->inactive2dnum)
2091  ASSERT(params->label)
2092 
2093  CKernel* k = params->kernel;
2094  float64_t* lin = params->lin;
2095  float64_t* aicache = params->aicache;
2096  float64_t* a= params->a;
2097  float64_t* a_old = params->a_old;
2098  int32_t* changed2dnum = params->changed2dnum;
2099  int32_t* inactive2dnum = params->inactive2dnum;
2100  int32_t* label = params->label;
2101  int32_t start = params->start;
2102  int32_t end = params->end;
2103 
2104  for (int32_t ii=start;ii<end;ii++)
2105  {
2106  int32_t i=changed2dnum[ii];
2107  int32_t j=0;
2108  ASSERT(i>=0)
2109 
2110  k->get_kernel_row(i,inactive2dnum,aicache);
2111  for (int32_t jj=0;(j=inactive2dnum[jj])>=0;jj++)
2112  lin[j]+=(a[i]-a_old[i])*aicache[j]*(float64_t)label[i];
2113  }
2114  return NULL;
2115 }
2116 
2118  int32_t* label, float64_t *a, SHRINK_STATE *shrink_state, float64_t *lin,
2119  float64_t *c, int32_t totdoc, int32_t iteration, int32_t *inconsistent,
2120  int32_t* docs, float64_t *aicache, float64_t *maxdiff)
2121  /* Make all variables active again which had been removed by
2122  shrinking. */
2123  /* Computes lin for those variables from scratch. */
2124 {
2125  register int32_t i,j,ii,jj,t,*changed2dnum,*inactive2dnum;
2126  int32_t *changed,*inactive;
2127  register float64_t *a_old,dist;
2128  float64_t ex_c,target;
2129 
2130  if (kernel->has_property(KP_LINADD) && get_linadd_enabled()) { /* special linear case */
2131  a_old=shrink_state->last_a;
2132 
2134  {
2135  SG_DEBUG(" clear normal - linadd\n")
2136  kernel->clear_normal();
2137 
2138  int32_t num_modified=0;
2139  for (i=0;i<totdoc;i++) {
2140  if(a[i] != a_old[i]) {
2141  kernel->add_to_normal(docs[i], ((a[i]-a_old[i])*(float64_t)label[i]));
2142  a_old[i]=a[i];
2143  num_modified++;
2144  }
2145  }
2146 
2147  if (num_modified>0)
2148  {
2149  // TODO: port to use OpenMP backend instead of pthread
2150 #ifdef HAVE_PTHREAD
2151  int32_t num_threads=parallel->get_num_threads();
2152 #else
2153  int32_t num_threads=1;
2154 #endif
2155  ASSERT(num_threads>0)
2156  if (num_threads < 2)
2157  {
2158  S_THREAD_PARAM_REACTIVATE_LINADD params;
2159  params.kernel=kernel;
2160  params.lin=lin;
2161  params.last_lin=shrink_state->last_lin;
2162  params.docs=docs;
2163  params.active=shrink_state->active;
2164  params.start=0;
2165  params.end=totdoc;
2167  }
2168 #ifdef HAVE_PTHREAD
2169  else
2170  {
2171  pthread_t* threads = SG_MALLOC(pthread_t, num_threads-1);
2172  S_THREAD_PARAM_REACTIVATE_LINADD* params = SG_MALLOC(S_THREAD_PARAM_REACTIVATE_LINADD, num_threads);
2173  int32_t step= totdoc/num_threads;
2174 
2175  for (t=0; t<num_threads-1; t++)
2176  {
2177  params[t].kernel=kernel;
2178  params[t].lin=lin;
2179  params[t].last_lin=shrink_state->last_lin;
2180  params[t].docs=docs;
2181  params[t].active=shrink_state->active;
2182  params[t].start = t*step;
2183  params[t].end = (t+1)*step;
2184  pthread_create(&threads[t], NULL, CSVMLight::reactivate_inactive_examples_linadd_helper, (void*)&params[t]);
2185  }
2186 
2187  params[t].kernel=kernel;
2188  params[t].lin=lin;
2189  params[t].last_lin=shrink_state->last_lin;
2190  params[t].docs=docs;
2191  params[t].active=shrink_state->active;
2192  params[t].start = t*step;
2193  params[t].end = totdoc;
2194  reactivate_inactive_examples_linadd_helper((void*) &params[t]);
2195 
2196  for (t=0; t<num_threads-1; t++)
2197  pthread_join(threads[t], NULL);
2198 
2199  SG_FREE(threads);
2200  SG_FREE(params);
2201  }
2202 #endif
2203 
2204  }
2205  }
2206  else
2207  {
2208  float64_t *alphas = SG_MALLOC(float64_t, totdoc);
2209  int32_t *idx = SG_MALLOC(int32_t, totdoc);
2210  int32_t num_suppvec=0 ;
2211 
2212  for (i=0; i<totdoc; i++)
2213  {
2214  if(a[i] != a_old[i])
2215  {
2216  alphas[num_suppvec] = (a[i]-a_old[i])*(float64_t)label[i];
2217  idx[num_suppvec] = i ;
2218  a_old[i] = a[i] ;
2219  num_suppvec++ ;
2220  }
2221  }
2222 
2223  if (num_suppvec>0)
2224  {
2225  int32_t num_inactive=0;
2226  int32_t* inactive_idx=SG_MALLOC(int32_t, totdoc); // infact we only need a subset
2227 
2228  j=0;
2229  for (i=0;i<totdoc;i++)
2230  {
2231  if(!shrink_state->active[i])
2232  {
2233  inactive_idx[j++] = i;
2234  num_inactive++;
2235  }
2236  }
2237 
2238  if (num_inactive>0)
2239  {
2240  float64_t* dest = SG_MALLOC(float64_t, num_inactive);
2241  memset(dest, 0, sizeof(float64_t)*num_inactive);
2242 
2243  kernel->compute_batch(num_inactive, inactive_idx, dest, num_suppvec, idx, alphas);
2244 
2245  j=0;
2246  for (i=0;i<totdoc;i++) {
2247  if(!shrink_state->active[i]) {
2248  lin[i] = shrink_state->last_lin[i] + dest[j++] ;
2249  }
2250  shrink_state->last_lin[i]=lin[i];
2251  }
2252 
2253  SG_FREE(dest);
2254  }
2255  else
2256  {
2257  for (i=0;i<totdoc;i++)
2258  shrink_state->last_lin[i]=lin[i];
2259  }
2260  SG_FREE(inactive_idx);
2261  }
2262  SG_FREE(alphas);
2263  SG_FREE(idx);
2264  }
2265 
2267  }
2268  else
2269  {
2270  changed=SG_MALLOC(int32_t, totdoc);
2271  changed2dnum=SG_MALLOC(int32_t, totdoc+11);
2272  inactive=SG_MALLOC(int32_t, totdoc);
2273  inactive2dnum=SG_MALLOC(int32_t, totdoc+11);
2274  for (t=shrink_state->deactnum-1;(t>=0) && shrink_state->a_history[t];t--)
2275  {
2276  if(verbosity>=2) {
2277  SG_INFO("%ld..",t)
2278  }
2279  a_old=shrink_state->a_history[t];
2280  for (i=0;i<totdoc;i++) {
2281  inactive[i]=((!shrink_state->active[i])
2282  && (shrink_state->inactive_since[i] == t));
2283  changed[i]= (a[i] != a_old[i]);
2284  }
2285  compute_index(inactive,totdoc,inactive2dnum);
2286  compute_index(changed,totdoc,changed2dnum);
2287 
2288 
2289  //TODO: PUT THIS BACK IN!!!!!!!! int32_t num_threads=parallel->get_num_threads();
2290  int32_t num_threads=1;
2291  ASSERT(num_threads>0)
2292 
2293  if (num_threads < 2)
2294  {
2295  for (ii=0;(i=changed2dnum[ii])>=0;ii++) {
2296  kernel->get_kernel_row(i,inactive2dnum,aicache);
2297  for (jj=0;(j=inactive2dnum[jj])>=0;jj++)
2298  lin[j]+=(a[i]-a_old[i])*aicache[j]*(float64_t)label[i];
2299  }
2300  }
2301 #ifdef HAVE_PTHREAD
2302  else
2303  {
2304  //find number of the changed ones
2305  int32_t num_changed=0;
2306  for (ii=0;changed2dnum[ii]>=0;ii++)
2307  num_changed++;
2308 
2309  if (num_changed>0)
2310  {
2311  pthread_t* threads= SG_MALLOC(pthread_t, num_threads-1);
2312  S_THREAD_PARAM_REACTIVATE_VANILLA* params = SG_MALLOC(S_THREAD_PARAM_REACTIVATE_VANILLA, num_threads);
2313  int32_t step= num_changed/num_threads;
2314 
2315  // alloc num_threads many tmp buffers
2316  float64_t* tmp_lin=SG_MALLOC(float64_t, totdoc*num_threads);
2317  memset(tmp_lin, 0, sizeof(float64_t)*((size_t) totdoc)*num_threads);
2318  float64_t* tmp_aicache=SG_MALLOC(float64_t, totdoc*num_threads);
2319  memset(tmp_aicache, 0, sizeof(float64_t)*((size_t) totdoc)*num_threads);
2320 
2321  int32_t thr;
2322  for (thr=0; thr<num_threads-1; thr++)
2323  {
2324  params[thr].kernel=kernel;
2325  params[thr].lin=&tmp_lin[thr*totdoc];
2326  params[thr].aicache=&tmp_aicache[thr*totdoc];
2327  params[thr].a=a;
2328  params[thr].a_old=a_old;
2329  params[thr].changed2dnum=changed2dnum;
2330  params[thr].inactive2dnum=inactive2dnum;
2331  params[thr].label=label;
2332  params[thr].start = thr*step;
2333  params[thr].end = (thr+1)*step;
2334  pthread_create(&threads[thr], NULL, CSVMLight::reactivate_inactive_examples_vanilla_helper, (void*)&params[thr]);
2335  }
2336 
2337  params[thr].kernel=kernel;
2338  params[thr].lin=&tmp_lin[thr*totdoc];
2339  params[thr].aicache=&tmp_aicache[thr*totdoc];
2340  params[thr].a=a;
2341  params[thr].a_old=a_old;
2342  params[thr].changed2dnum=changed2dnum;
2343  params[thr].inactive2dnum=inactive2dnum;
2344  params[thr].label=label;
2345  params[thr].start = thr*step;
2346  params[thr].end = num_changed;
2347  reactivate_inactive_examples_vanilla_helper((void*) &params[thr]);
2348 
2349  for (jj=0;(j=inactive2dnum[jj])>=0;jj++)
2350  lin[j]+=tmp_lin[totdoc*thr+j];
2351 
2352  for (thr=0; thr<num_threads-1; thr++)
2353  {
2354  pthread_join(threads[thr], NULL);
2355 
2356  //add up results
2357  for (jj=0;(j=inactive2dnum[jj])>=0;jj++)
2358  lin[j]+=tmp_lin[totdoc*thr+j];
2359  }
2360 
2361  SG_FREE(tmp_lin);
2362  SG_FREE(tmp_aicache);
2363  SG_FREE(threads);
2364  SG_FREE(params);
2365  }
2366  }
2367 #endif
2368  }
2369  SG_FREE(changed);
2370  SG_FREE(changed2dnum);
2371  SG_FREE(inactive);
2372  SG_FREE(inactive2dnum);
2373  }
2374 
2375  (*maxdiff)=0;
2376  for (i=0;i<totdoc;i++) {
2377  shrink_state->inactive_since[i]=shrink_state->deactnum-1;
2378  if(!inconsistent[i]) {
2379  dist=(lin[i]-model->b)*(float64_t)label[i];
2380  target=-(learn_parm->eps[i]-(float64_t)label[i]*c[i]);
2381  ex_c=learn_parm->svm_cost[i]-learn_parm->epsilon_a;
2382  if((a[i]>learn_parm->epsilon_a) && (dist > target)) {
2383  if((dist-target)>(*maxdiff)) /* largest violation */
2384  (*maxdiff)=dist-target;
2385  }
2386  else if((a[i]<ex_c) && (dist < target)) {
2387  if((target-dist)>(*maxdiff)) /* largest violation */
2388  (*maxdiff)=target-dist;
2389  }
2390  if((a[i]>(0+learn_parm->epsilon_a))
2391  && (a[i]<ex_c)) {
2392  shrink_state->active[i]=1; /* not at bound */
2393  }
2394  else if((a[i]<=(0+learn_parm->epsilon_a)) && (dist < (target+learn_parm->epsilon_shrink))) {
2395  shrink_state->active[i]=1;
2396  }
2397  else if((a[i]>=ex_c)
2398  && (dist > (target-learn_parm->epsilon_shrink))) {
2399  shrink_state->active[i]=1;
2400  }
2401  else if(learn_parm->sharedslack) { /* make all active when sharedslack */
2402  shrink_state->active[i]=1;
2403  }
2404  }
2405  }
2406 
2407  if (!(kernel->has_property(KP_LINADD) && get_linadd_enabled())) { /* update history for non-linear */
2408  for (i=0;i<totdoc;i++) {
2409  (shrink_state->a_history[shrink_state->deactnum-1])[i]=a[i];
2410  }
2411  for (t=shrink_state->deactnum-2;(t>=0) && shrink_state->a_history[t];t--) {
2412  SG_FREE(shrink_state->a_history[t]);
2413  shrink_state->a_history[t]=0;
2414  }
2415  }
2416 }
2417 
2418 
2419 
2420 /* start the optimizer and return the optimal values */
2422  QP *qp, float64_t *epsilon_crit, int32_t nx, float64_t *threshold,
2423  int32_t& svm_maxqpsize)
2424 {
2425  register int32_t i, j, result;
2426  float64_t margin, obj_before, obj_after;
2427  float64_t sigdig, dist, epsilon_loqo;
2428  int32_t iter;
2429 
2430  if(!primal) { /* allocate memory at first call */
2431  primal=SG_MALLOC(float64_t, nx*3);
2432  dual=SG_MALLOC(float64_t, nx*2+1);
2433  }
2434 
2435  obj_before=0; /* calculate objective before optimization */
2436  for(i=0;i<qp->opt_n;i++) {
2437  obj_before+=(qp->opt_g0[i]*qp->opt_xinit[i]);
2438  obj_before+=(0.5*qp->opt_xinit[i]*qp->opt_xinit[i]*qp->opt_g[i*qp->opt_n+i]);
2439  for(j=0;j<i;j++) {
2440  obj_before+=(qp->opt_xinit[j]*qp->opt_xinit[i]*qp->opt_g[j*qp->opt_n+i]);
2441  }
2442  }
2443 
2444  result=STILL_RUNNING;
2445  qp->opt_ce0[0]*=(-1.0);
2446  /* Run pr_loqo. If a run fails, try again with parameters which lead */
2447  /* to a slower, but more robust setting. */
2448  for(margin=init_margin,iter=init_iter;
2449  (margin<=0.9999999) && (result!=OPTIMAL_SOLUTION);) {
2450 
2452  sigdig=-log10(opt_precision);
2453 
2454  result=pr_loqo((int32_t)qp->opt_n,(int32_t)qp->opt_m,
2455  (float64_t *)qp->opt_g0,(float64_t *)qp->opt_g,
2456  (float64_t *)qp->opt_ce,(float64_t *)qp->opt_ce0,
2457  (float64_t *)qp->opt_low,(float64_t *)qp->opt_up,
2459  (int32_t)(verbosity-2),
2460  (float64_t)sigdig,(int32_t)iter,
2461  (float64_t)margin,(float64_t)(qp->opt_up[0])/4.0,(int32_t)0);
2462 
2463  if(CMath::is_nan(dual[0])) { /* check for choldc problem */
2464  if(verbosity>=2) {
2465  SG_SDEBUG("Restarting PR_LOQO with more conservative parameters.\n")
2466  }
2467  if(init_margin<0.80) { /* become more conservative in general */
2468  init_margin=(4.0*margin+1.0)/5.0;
2469  }
2470  margin=(margin+1.0)/2.0;
2471  (opt_precision)*=10.0; /* reduce precision */
2472  if(verbosity>=2) {
2473  SG_SDEBUG("Reducing precision of PR_LOQO.\n")
2474  }
2475  }
2476  else if(result!=OPTIMAL_SOLUTION) {
2477  iter+=2000;
2478  init_iter+=10;
2479  (opt_precision)*=10.0; /* reduce precision */
2480  if(verbosity>=2) {
2481  SG_SDEBUG("Reducing precision of PR_LOQO due to (%ld).\n",result)
2482  }
2483  }
2484  }
2485 
2486  if(qp->opt_m) /* Thanks to Alex Smola for this hint */
2487  model_b=dual[0];
2488  else
2489  model_b=0;
2490 
2491  /* Check the precision of the alphas. If results of current optimization */
2492  /* violate KT-Conditions, relax the epsilon on the bounds on alphas. */
2493  epsilon_loqo=1E-10;
2494  for(i=0;i<qp->opt_n;i++) {
2495  dist=-model_b*qp->opt_ce[i];
2496  dist+=(qp->opt_g0[i]+1.0);
2497  for(j=0;j<i;j++) {
2498  dist+=(primal[j]*qp->opt_g[j*qp->opt_n+i]);
2499  }
2500  for(j=i;j<qp->opt_n;j++) {
2501  dist+=(primal[j]*qp->opt_g[i*qp->opt_n+j]);
2502  }
2503  /* SG_SDEBUG("LOQO: a[%d]=%f, dist=%f, b=%f\n",i,primal[i],dist,dual[0]) */
2504  if((primal[i]<(qp->opt_up[i]-epsilon_loqo)) && (dist < (1.0-(*epsilon_crit)))) {
2505  epsilon_loqo=(qp->opt_up[i]-primal[i])*2.0;
2506  }
2507  else if((primal[i]>(0+epsilon_loqo)) && (dist > (1.0+(*epsilon_crit)))) {
2508  epsilon_loqo=primal[i]*2.0;
2509  }
2510  }
2511 
2512  for(i=0;i<qp->opt_n;i++) { /* clip alphas to bounds */
2513  if(primal[i]<=(0+epsilon_loqo)) {
2514  primal[i]=0;
2515  }
2516  else if(primal[i]>=(qp->opt_up[i]-epsilon_loqo)) {
2517  primal[i]=qp->opt_up[i];
2518  }
2519  }
2520 
2521  obj_after=0; /* calculate objective after optimization */
2522  for(i=0;i<qp->opt_n;i++) {
2523  obj_after+=(qp->opt_g0[i]*primal[i]);
2524  obj_after+=(0.5*primal[i]*primal[i]*qp->opt_g[i*qp->opt_n+i]);
2525  for(j=0;j<i;j++) {
2526  obj_after+=(primal[j]*primal[i]*qp->opt_g[j*qp->opt_n+i]);
2527  }
2528  }
2529 
2530  /* if optimizer returned NAN values, reset and retry with smaller */
2531  /* working set. */
2532  if(CMath::is_nan(obj_after) || CMath::is_nan(model_b)) {
2533  for(i=0;i<qp->opt_n;i++) {
2534  primal[i]=qp->opt_xinit[i];
2535  }
2536  model_b=0;
2537  if(svm_maxqpsize>2) {
2538  svm_maxqpsize--; /* decrease size of qp-subproblems */
2539  }
2540  }
2541 
2542  if(obj_after >= obj_before) { /* check whether there was progress */
2543  (opt_precision)/=100.0;
2545  if(verbosity>=2) {
2546  SG_SDEBUG("Increasing Precision of PR_LOQO.\n")
2547  }
2548  }
2549 
2550  // TODO: add parameter for this (e.g. for MTL experiments)
2551  if(precision_violations > 5000) {
2552  (*epsilon_crit)*=10.0;
2554  SG_SINFO("Relaxing epsilon on KT-Conditions.\n")
2555  }
2556 
2557  (*threshold)=model_b;
2558 
2559  if(result!=OPTIMAL_SOLUTION) {
2560  SG_SERROR("PR_LOQO did not converge.\n")
2561  return(qp->opt_xinit);
2562  }
2563  else {
2564  return(primal);
2565  }
2566 }
2567 
2568 
2569 #endif //USE_SVMLIGHT
virtual void clear_normal()
Definition: Kernel.cpp:834
Class Time that implements a stopwatch based on either cpu time or wall clock time.
Definition: Time.h:42
virtual bool init(CFeatures *lhs, CFeatures *rhs)
Definition: Kernel.cpp:96
void optimize_svm(int32_t *docs, int32_t *label, int32_t *exclude_from_eq_const, float64_t eq_target, int32_t *chosen, int32_t *active2dnum, int32_t totdoc, int32_t *working2dnum, int32_t varnum, float64_t *a, float64_t *lin, float64_t *c, float64_t *aicache, QP *qp, float64_t *epsilon_crit_target)
Definition: SVMLight.cpp:1009
#define DEF_PRECISION
Definition: SVMLight.h:38
void select_top_n(float64_t *selcrit, int32_t range, int32_t *select, int32_t n)
Definition: SVMLight.cpp:1928
int32_t get_activenum_cache()
#define SG_INFO(...)
Definition: SGIO.h:117
float64_t opt_precision
Definition: SVMLight.h:668
#define SG_DONE()
Definition: SGIO.h:156
static void fill_vector(T *vec, int32_t len, T value)
Definition: SGVector.cpp:264
virtual ELabelType get_label_type() const =0
int32_t select_next_qp_subproblem_rand(int32_t *label, float64_t *a, float64_t *lin, float64_t *c, int32_t totdoc, int32_t qp_size, int32_t *inconsistent, int32_t *active2dnum, int32_t *working2dnum, float64_t *selcrit, int32_t *select, int32_t *key, int32_t *chosen, int32_t iteration)
Definition: SVMLight.cpp:1858
binary labels +1/-1
Definition: LabelTypes.h:18
int32_t init_iter
Definition: SVMLight.h:662
virtual void compute_by_subkernel(int32_t vector_idx, float64_t *subkernel_contrib)
Definition: Kernel.cpp:844
void cache_multiple_kernel_rows(int32_t *key, int32_t varnum)
Definition: Kernel.cpp:374
int32_t compute_index(int32_t *binfeature, int32_t range, int32_t *index)
Definition: SVMLight.cpp:989
int32_t get_max_elems_cache()
static void * update_linear_component_linadd_helper(void *p)
Definition: SVMLight.cpp:105
virtual void update_linear_component(int32_t *docs, int32_t *label, int32_t *active2dnum, float64_t *a, float64_t *a_old, int32_t *working2dnum, int32_t totdoc, float64_t *lin, float64_t *aicache, float64_t *c)
Definition: SVMLight.cpp:1435
int32_t get_num_threads() const
Definition: Parallel.cpp:97
static void * reactivate_inactive_examples_linadd_helper(void *p)
Definition: SVMLight.cpp:2057
int32_t index_t
Definition: common.h:72
The class Labels models labels, i.e. class assignments of objects.
Definition: Labels.h:43
virtual int32_t get_num_labels() const =0
virtual void reactivate_inactive_examples(int32_t *label, float64_t *a, SHRINK_STATE *shrink_state, float64_t *lin, float64_t *c, int32_t totdoc, int32_t iteration, int32_t *inconsistent, int32_t *docs, float64_t *aicache, float64_t *maxdiff)
Definition: SVMLight.cpp:2117
static float64_t log10(float64_t v)
Definition: Math.h:892
bool(* callback)(CMKL *mkl, const float64_t *sumw, const float64_t suma)
Definition: SVM.h:284
int32_t verbosity
Definition: SVMLight.h:657
virtual int32_t get_num_vectors() const =0
bool use_kernel_cache
Definition: SVMLight.h:685
float64_t m_max_train_time
Definition: Machine.h:362
CLabels * m_labels
Definition: Machine.h:365
#define SG_ERROR(...)
Definition: SGIO.h:128
virtual bool delete_optimization()
Definition: Kernel.cpp:810
int32_t kernel_cache_space_available()
CMKL * mkl
Definition: SVM.h:287
int32_t check_optimality(int32_t *label, float64_t *a, float64_t *lin, float64_t *c, int32_t totdoc, float64_t *maxdiff, float64_t epsilon_crit_org, int32_t *misclassified, int32_t *inconsistent, int32_t *active2dnum, int32_t *last_suboptimal_at, int32_t iteration)
Definition: SVMLight.cpp:1374
float64_t epsilon
Definition: SVM.h:266
Parallel * parallel
Definition: SGObject.h:561
float64_t * primal
Definition: SVMLight.h:670
static void * update_linear_component_mkl_linadd_helper(void *p)
Definition: SVMLight.cpp:1692
virtual int32_t get_num_vec_lhs()
void compute_matrices_for_optimization_parallel(int32_t *docs, int32_t *label, int32_t *exclude_from_eq_const, float64_t eq_target, int32_t *chosen, int32_t *active2dnum, int32_t *key, float64_t *a, float64_t *lin, float64_t *c, int32_t varnum, int32_t totdoc, float64_t *aicache, QP *qp)
Definition: SVMLight.cpp:1046
int32_t kernel_cache_touch(int32_t cacheidx)
void kernel_cache_shrink(int32_t totdoc, int32_t num_shrink, int32_t *after)
Definition: Kernel.cpp:470
void shrink_state_cleanup(SHRINK_STATE *shrink_state)
Definition: SVMLight.cpp:1985
virtual float64_t * get_linear_term_array()
Definition: SVM.cpp:297
static T * clone_vector(const T *vec, int32_t len)
Definition: SGVector.cpp:256
SGVector< float64_t > m_alpha
float64_t cur_time_diff(bool verbose=false)
Definition: Time.cpp:68
float64_t model_b
Definition: SVMLight.h:666
SGVector< float64_t > m_linear_term
Definition: SVM.h:261
bool has_property(EKernelProperty p)
void add_to_index(int32_t *index, int32_t elem)
Definition: SVMLight.cpp:980
index_t vlen
Definition: SGVector.h:545
void update_linear_component_mkl(int32_t *docs, int32_t *label, int32_t *active2dnum, float64_t *a, float64_t *a_old, int32_t *working2dnum, int32_t totdoc, float64_t *lin, float64_t *aicache)
Definition: SVMLight.cpp:1538
float64_t init_margin
Definition: SVMLight.h:660
#define ASSERT(x)
Definition: SGIO.h:200
void set_bias(float64_t bias)
void cache_kernel_row(int32_t x)
Definition: Kernel.cpp:300
float64_t C2
Definition: SVM.h:274
void compute_matrices_for_optimization(int32_t *docs, int32_t *label, int32_t *exclude_from_eq_const, float64_t eq_target, int32_t *chosen, int32_t *active2dnum, int32_t *key, float64_t *a, float64_t *lin, float64_t *c, int32_t varnum, int32_t totdoc, float64_t *aicache, QP *qp)
Definition: SVMLight.cpp:1184
static void clear_cancel()
Definition: Signal.cpp:126
CKernel * get_kernel(int32_t idx)
int32_t select_next_qp_subproblem_grad(int32_t *label, float64_t *a, float64_t *lin, float64_t *c, int32_t totdoc, int32_t qp_size, int32_t *inconsistent, int32_t *active2dnum, int32_t *working2dnum, float64_t *selcrit, int32_t *select, int32_t cache_only, int32_t *key, int32_t *chosen)
Definition: SVMLight.cpp:1766
bool get_shrinking_enabled()
Definition: SVM.h:188
double float64_t
Definition: common.h:60
static void * compute_kernel_helper(void *p)
Definition: SVMLight.cpp:117
static void * reactivate_inactive_examples_vanilla_helper(void *p)
Definition: SVMLight.cpp:2080
bool set_alpha(int32_t idx, float64_t val)
float64_t mymaxdiff
Definition: SVMLight.h:683
void set_objective(float64_t v)
Definition: SVM.h:209
virtual float64_t compute_objective_function(float64_t *a, float64_t *lin, float64_t *c, float64_t *eps, int32_t *label, int32_t totdoc)
Definition: SVMLight.cpp:958
virtual float64_t compute_optimized(int32_t vector_idx)
Definition: Kernel.cpp:816
EOptimizationType get_optimization_type()
int32_t precision_violations
Definition: SVMLight.h:664
float64_t get_alpha(int32_t idx)
virtual bool train_machine(CFeatures *data=NULL)
Definition: SVMLight.cpp:183
float64_t * optimize_qp(QP *qp, float64_t *epsilon_crit, int32_t nx, float64_t *threshold, int32_t &svm_maxqpsize)
Definition: SVMLight.cpp:2421
int32_t calculate_svm_model(int32_t *docs, int32_t *label, float64_t *lin, float64_t *a, float64_t *a_old, float64_t *c, int32_t *working2dnum, int32_t *active2dnum)
Definition: SVMLight.cpp:1257
ESolverType get_solver_type()
Definition: Machine.cpp:102
virtual const float64_t * get_subkernel_weights(int32_t &num_weights)
Definition: Kernel.cpp:850
The Combined kernel is used to combine a number of kernels into a single CombinedKernel object by lin...
float64_t C1
Definition: SVM.h:272
static T max(T a, T b)
Definition: Math.h:164
bool set_support_vector(int32_t idx, int32_t val)
int32_t get_support_vector(int32_t idx)
static bool cancel_computations()
Definition: Signal.h:111
virtual ~CSVMLight()
Definition: SVMLight.cpp:166
void clear_index(int32_t *index)
Definition: SVMLight.cpp:974
virtual int32_t get_num_vec_rhs()
virtual void set_subkernel_weights(SGVector< float64_t > weights)
Definition: Kernel.cpp:863
#define SG_ABS_PROGRESS(...)
Definition: SGIO.h:151
int32_t get_runtime()
Definition: SVMLight.cpp:295
int32_t optimize_to_convergence(int32_t *docs, int32_t *label, int32_t totdoc, SHRINK_STATE *shrink_state, int32_t *inconsistent, float64_t *a, float64_t *lin, float64_t *c, TIMING *timing_profile, float64_t *maxdiff, int32_t heldout, int32_t retrain)
Definition: SVMLight.cpp:548
#define SG_UNREF(x)
Definition: SGObject.h:53
#define SG_DEBUG(...)
Definition: SGIO.h:106
all of classes and functions are contained in the shogun namespace
Definition: class_list.h:18
virtual void compute_batch(int32_t num_vec, int32_t *vec_idx, float64_t *target, int32_t num_suppvec, int32_t *IDX, float64_t *alphas, float64_t factor=1.0)
Definition: Kernel.cpp:822
#define SG_SDEBUG(...)
Definition: SGIO.h:167
virtual EKernelType get_kernel_type()=0
void set_time(int32_t t)
static int is_nan(double f)
checks whether a float is nan
Definition: Math.cpp:228
The class Features is the base class of all feature objects.
Definition: Features.h:68
static T min(T a, T b)
Definition: Math.h:153
#define SG_SERROR(...)
Definition: SGIO.h:178
void call_mkl_callback(float64_t *a, int32_t *label, float64_t *lin)
Definition: SVMLight.cpp:1705
#define SG_SINFO(...)
Definition: SGIO.h:172
LEARN_PARM * learn_parm
Definition: SVMLight.h:655
void kernel_cache_cleanup()
Definition: Kernel.cpp:542
class SVMlight
Definition: SVMLight.h:225
int32_t kernel_cache_check(int32_t cacheidx)
virtual int32_t get_num_subkernels()
Definition: Kernel.cpp:839
A generic Support Vector Machine Interface.
Definition: SVM.h:49
int32_t shrink_problem(SHRINK_STATE *shrink_state, int32_t *active2dnum, int32_t *last_suboptimal_at, int32_t iteration, int32_t totdoc, int32_t minshrink, float64_t *a, int32_t *inconsistent, float64_t *c, float64_t *lin, int *label)
Definition: SVMLight.cpp:1996
The Kernel base class.
Binary Labels for binary classification.
Definition: BinaryLabels.h:37
int32_t get_cache_size()
virtual SGVector< float64_t > get_kernel_row(int32_t i)
void set_kernel(CKernel *k)
float64_t * W
Definition: SVMLight.h:679
#define SG_WARNING(...)
Definition: SGIO.h:127
void update_linear_component_mkl_linadd(int32_t *docs, int32_t *label, int32_t *active2dnum, float64_t *a, float64_t *a_old, int32_t *working2dnum, int32_t totdoc, float64_t *lin, float64_t *aicache)
Definition: SVMLight.cpp:1612
int32_t get_qpsize()
Definition: SVM.h:173
virtual float64_t compute_kernel(int32_t i, int32_t j)
Definition: SVMLight.h:605
virtual bool has_features()
virtual void add_to_normal(int32_t vector_idx, float64_t weight)
Definition: Kernel.cpp:829
float64_t * dual
Definition: SVMLight.h:672
#define MAXSHRINK
Definition: SVMLight.h:39
float64_t get_objective()
Definition: SVM.h:218
void resize_kernel_cache(KERNELCACHE_IDX size, bool regression_hack=false)
Definition: Kernel.cpp:83
bool create_new_model(int32_t num)
virtual const char * get_name() const
Definition: SVMLight.h:635
void init_shrink_state(SHRINK_STATE *shrink_state, int32_t totdoc, int32_t maxhistory)
Definition: SVMLight.cpp:1964

SHOGUN Machine Learning Toolbox - Documentation