SHOGUN  6.0.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules
KernelMachine.cpp
Go to the documentation of this file.
1 /*
2  * This program is free software; you can redistribute it and/or modify
3  * it under the terms of the GNU General Public License as published by
4  * the Free Software Foundation; either version 3 of the License, or
5  * (at your option) any later version.
6  *
7  * Written (W) 1999-2009 Soeren Sonnenburg
8  * Written (W) 2011-2012 Heiko Strathmann
9  * Copyright (C) 1999-2009 Fraunhofer Institute FIRST and Max-Planck-Society
10  */
11 
13 #include <shogun/lib/Signal.h>
15 #include <shogun/io/SGIO.h>
16 
17 #include <shogun/kernel/Kernel.h>
19 #include <shogun/labels/Labels.h>
20 
21 using namespace shogun;
22 
23 #ifndef DOXYGEN_SHOULD_SKIP_THIS
24 struct S_THREAD_PARAM_KERNEL_MACHINE
25 {
26  CKernelMachine* kernel_machine;
27  float64_t* result;
28  int32_t start;
29  int32_t end;
30 
31  /* if non-null, start and end correspond to indices in this vector */
32  index_t* indices;
33  index_t indices_len;
34  bool verbose;
35 };
36 #endif // DOXYGEN_SHOULD_SKIP_THIS
37 
39 {
40  init();
41 }
42 
45 {
46  init();
47 
48  int32_t num_sv=svs.vlen;
49  ASSERT(num_sv == alphas.vlen)
50  create_new_model(num_sv);
51  set_alphas(alphas);
54  set_bias(b);
55 }
56 
58 {
59  init();
60 
61  SGVector<float64_t> alphas = machine->get_alphas().clone();
62  SGVector<int32_t> svs = machine->get_support_vectors().clone();
63  float64_t bias = machine->get_bias();
64  CKernel* ker = machine->get_kernel();
65 
66  int32_t num_sv = svs.vlen;
67  create_new_model(num_sv);
68  set_alphas(alphas);
70  set_bias(bias);
71  set_kernel(ker);
72 }
73 
75 {
79 }
80 
82 {
83  SG_REF(k);
85  kernel=k;
86 }
87 
89 {
90  SG_REF(kernel);
91  return kernel;
92 }
93 
95 {
96  use_batch_computation=enable;
97 }
98 
100 {
101  return use_batch_computation;
102 }
103 
105 {
106  use_linadd=enable;
107 }
108 
110 {
111  return use_linadd;
112 }
113 
114 void CKernelMachine::set_bias_enabled(bool enable_bias)
115 {
116  use_bias=enable_bias;
117 }
118 
120 {
121  return use_bias;
122 }
123 
125 {
126  return m_bias;
127 }
128 
130 {
131  m_bias=bias;
132 }
133 
135 {
136  ASSERT(m_svs.vector && idx<m_svs.vlen)
137  return m_svs.vector[idx];
138 }
139 
141 {
142  if (!m_alpha.vector)
143  SG_ERROR("No alphas set\n")
144  if (idx>=m_alpha.vlen)
145  SG_ERROR("Alphas index (%d) out of range (%d)\n", idx, m_svs.vlen)
146  return m_alpha.vector[idx];
147 }
148 
149 bool CKernelMachine::set_support_vector(int32_t idx, int32_t val)
150 {
151  if (m_svs.vector && idx<m_svs.vlen)
152  m_svs.vector[idx]=val;
153  else
154  return false;
155 
156  return true;
157 }
158 
159 bool CKernelMachine::set_alpha(int32_t idx, float64_t val)
160 {
161  if (m_alpha.vector && idx<m_alpha.vlen)
162  m_alpha.vector[idx]=val;
163  else
164  return false;
165 
166  return true;
167 }
168 
170 {
171  return m_svs.vlen;
172 }
173 
175 {
176  m_alpha = alphas;
177 }
178 
180 {
181  m_svs = svs;
182 }
183 
185 {
186  return m_svs;
187 }
188 
190 {
191  return m_alpha;
192 }
193 
195 {
198 
199  m_bias=0;
200 
201  if (num>0)
202  {
204  m_svs= SGVector<int32_t>(num);
205  return (m_alpha.vector!=NULL && m_svs.vector!=NULL);
206  }
207  else
208  return true;
209 }
210 
212 {
213  int32_t num_sv=get_num_support_vectors();
214 
215  if (kernel && kernel->has_property(KP_LINADD) && num_sv>0)
216  {
217  int32_t * sv_idx = SG_MALLOC(int32_t, num_sv);
218  float64_t* sv_weight = SG_MALLOC(float64_t, num_sv);
219 
220  for(int32_t i=0; i<num_sv; i++)
221  {
222  sv_idx[i] = get_support_vector(i) ;
223  sv_weight[i] = get_alpha(i) ;
224  }
225 
226  bool ret = kernel->init_optimization(num_sv, sv_idx, sv_weight) ;
227 
228  SG_FREE(sv_idx);
229  SG_FREE(sv_weight);
230 
231  if (!ret)
232  SG_ERROR("initialization of kernel optimization failed\n")
233 
234  return ret;
235  }
236  else
237  SG_ERROR("initialization of kernel optimization failed\n")
238 
239  return false;
240 }
241 
243 {
244  SGVector<float64_t> outputs = apply_get_outputs(data);
245  return new CRegressionLabels(outputs);
246 }
247 
249 {
250  SGVector<float64_t> outputs = apply_get_outputs(data);
251  return new CBinaryLabels(outputs);
252 }
253 
255 {
256  SG_DEBUG("entering %s::apply_get_outputs(%s at %p)\n",
257  get_name(), data ? data->get_name() : "NULL", data);
258 
259  REQUIRE(kernel, "%s::apply_get_outputs(): No kernel assigned!\n")
260 
261  if (!kernel->get_num_vec_lhs())
262  {
263  SG_ERROR("%s: No vectors on left hand side (%s). This is probably due to"
264  " an implementation error in %s, where it was forgotten to set "
265  "the data (m_svs) indices\n", get_name(),
266  data->get_name());
267  }
268 
269  if (data)
270  {
271  CFeatures* lhs=kernel->get_lhs();
272  REQUIRE(lhs, "%s::apply_get_outputs(): No left hand side specified\n",
273  get_name());
274  kernel->init(lhs, data);
275  SG_UNREF(lhs);
276  }
277 
278  /* using the features to get num vectors is far safer than using the kernel
279  * since SHOGUNs kernel num_rhs/num_lhs is buggy (CombinedKernel for ex.)
280  * Might be worth investigating why
281  * kernel->get_num_rhs() != rhs->get_num_vectors()
282  * However, the below version works
283  * TODO Heiko Strathmann
284  */
285  CFeatures* rhs=kernel->get_rhs();
286  int32_t num_vectors=rhs ? rhs->get_num_vectors() : kernel->get_num_vec_rhs();
287  SG_UNREF(rhs)
288 
289  SGVector<float64_t> output(num_vectors);
290 
291  if (kernel->get_num_vec_rhs()>0)
292  {
293  SG_DEBUG("computing output on %d test examples\n", num_vectors)
294 
296 
297  if (io->get_show_progress())
298  io->enable_progress();
299  else
300  io->disable_progress();
301 
304  {
305  output.zero();
306  SG_DEBUG("Batch evaluation enabled\n")
307  if (get_num_support_vectors()>0)
308  {
309  int32_t* sv_idx=SG_MALLOC(int32_t, get_num_support_vectors());
310  float64_t* sv_weight=SG_MALLOC(float64_t, get_num_support_vectors());
311  int32_t* idx=SG_MALLOC(int32_t, num_vectors);
312 
313  //compute output for all vectors v[0]...v[num_vectors-1]
314  for (int32_t i=0; i<num_vectors; i++)
315  idx[i]=i;
316 
317  for (int32_t i=0; i<get_num_support_vectors(); i++)
318  {
319  sv_idx[i] = get_support_vector(i) ;
320  sv_weight[i] = get_alpha(i) ;
321  }
322 
323  kernel->compute_batch(num_vectors, idx,
324  output.vector, get_num_support_vectors(), sv_idx, sv_weight);
325  SG_FREE(sv_idx);
326  SG_FREE(sv_weight);
327  SG_FREE(idx);
328  }
329 
330  for (int32_t i=0; i<num_vectors; i++)
331  output[i] = get_bias() + output[i];
332 
333  }
334  else
335  {
336  // TODO: port to use OpenMP backend instead of pthread
337 #ifdef HAVE_PTHREAD
338  int32_t num_threads=parallel->get_num_threads();
339 #else
340  int32_t num_threads=1;
341 #endif
342  ASSERT(num_threads>0)
343 
344  if (num_threads < 2)
345  {
346  S_THREAD_PARAM_KERNEL_MACHINE params;
347  params.kernel_machine=this;
348  params.result = output.vector;
349  params.start=0;
350  params.end=num_vectors;
351  params.verbose=true;
352  params.indices = NULL;
353  params.indices_len = 0;
354  apply_helper((void*) &params);
355  }
356 #ifdef HAVE_PTHREAD
357  else
358  {
359  pthread_t* threads = SG_MALLOC(pthread_t, num_threads-1);
360  S_THREAD_PARAM_KERNEL_MACHINE* params = SG_MALLOC(S_THREAD_PARAM_KERNEL_MACHINE, num_threads);
361  int32_t step= num_vectors/num_threads;
362 
363  int32_t t;
364 
365  for (t=0; t<num_threads-1; t++)
366  {
367  params[t].kernel_machine = this;
368  params[t].result = output.vector;
369  params[t].start = t*step;
370  params[t].end = (t+1)*step;
371  params[t].verbose = false;
372  params[t].indices = NULL;
373  params[t].indices_len = 0;
374  pthread_create(&threads[t], NULL,
375  CKernelMachine::apply_helper, (void*)&params[t]);
376  }
377 
378  params[t].kernel_machine = this;
379  params[t].result = output.vector;
380  params[t].start = t*step;
381  params[t].end = num_vectors;
382  params[t].verbose = true;
383  params[t].indices = NULL;
384  params[t].indices_len = 0;
385  apply_helper((void*) &params[t]);
386 
387  for (t=0; t<num_threads-1; t++)
388  pthread_join(threads[t], NULL);
389 
390  SG_FREE(params);
391  SG_FREE(threads);
392  }
393 #endif
394  }
395 
396 #ifndef WIN32
398  SG_INFO("prematurely stopped. \n")
399  else
400 #endif
401  SG_DONE()
402  }
403 
404  SG_DEBUG("leaving %s::apply_get_outputs(%s at %p)\n",
405  get_name(), data ? data->get_name() : "NULL", data);
406 
407  return output;
408 }
409 
411 {
412  ASSERT(kernel)
413 
415  {
416  float64_t score = kernel->compute_optimized(num);
417  return score+get_bias();
418  }
419  else
420  {
421  float64_t score=0;
422  for(int32_t i=0; i<get_num_support_vectors(); i++)
423  score+=kernel->kernel(get_support_vector(i), num)*get_alpha(i);
424 
425  return score+get_bias();
426  }
427 }
428 
430 {
431  S_THREAD_PARAM_KERNEL_MACHINE* params = (S_THREAD_PARAM_KERNEL_MACHINE*) p;
432  float64_t* result = params->result;
433  CKernelMachine* kernel_machine = params->kernel_machine;
434 
435 #ifdef WIN32
436  for (int32_t vec=params->start; vec<params->end; vec++)
437 #else
438  for (int32_t vec=params->start; vec<params->end &&
440 #endif
441  {
442  if (params->verbose)
443  {
444  int32_t num_vectors=params->end - params->start;
445  int32_t v=vec-params->start;
446  if ( (v% (num_vectors/100+1))== 0)
447  SG_SPROGRESS(v, 0.0, num_vectors-1)
448  }
449 
450  /* eventually use index mapping if exists */
451  index_t idx=params->indices ? params->indices[vec] : vec;
452  result[vec] = kernel_machine->apply_one(idx);
453  }
454 
455  return NULL;
456 }
457 
459 {
460  if (!kernel)
461  SG_ERROR("kernel is needed to store SV features.\n")
462 
463  CFeatures* lhs=kernel->get_lhs();
464  CFeatures* rhs=kernel->get_rhs();
465 
466  if (!lhs)
467  SG_ERROR("kernel lhs is needed to store SV features.\n")
468 
469  /* copy sv feature data */
470  CFeatures* sv_features=lhs->copy_subset(m_svs);
471  SG_UNREF(lhs);
472 
473  /* set new lhs to kernel */
474  kernel->init(sv_features, rhs);
475 
476  /* unref rhs */
477  SG_UNREF(rhs);
478 
479  /* was SG_REF'ed by copy_subset */
480  SG_UNREF(sv_features);
481 
482  /* now sv indices are just the identity */
483  m_svs.range_fill();
484 
485 }
486 
488 {
489  SG_DEBUG("entering %s::train_locked()\n", get_name())
490  if (!is_data_locked())
491  SG_ERROR("CKernelMachine::train_locked() call data_lock() before!\n")
492 
493  /* this is asusmed here */
495 
496  /* since its not easily possible to controll the row subsets of the custom
497  * kernel from outside, we enforce that there is only one row subset by
498  * removing all of them. Otherwise, they would add up in the stack until
499  * an error occurs */
501 
502  /* set custom kernel subset of data to train on */
505 
506  /* set corresponding labels subset */
507  m_labels->add_subset(indices);
508 
509  /* dont do train because model should not be stored (no acutal features)
510  * and train does data_unlock */
511  bool result=train_machine();
512 
513  /* remove last col subset of custom kernel */
515 
516  /* remove label subset after training */
518 
519  SG_DEBUG("leaving %s::train_locked()\n", get_name())
520  return result;
521 }
522 
524 {
525  SGVector<float64_t> outputs = apply_locked_get_output(indices);
526  return new CBinaryLabels(outputs);
527 }
528 
530  SGVector<index_t> indices)
531 {
532  SGVector<float64_t> outputs = apply_locked_get_output(indices);
533  return new CRegressionLabels(outputs);
534 }
535 
537  SGVector<index_t> indices)
538 {
539  if (!is_data_locked())
540  SG_ERROR("CKernelMachine::apply_locked() call data_lock() before!\n")
541 
542  /* we are working on a custom kernel here */
544 
545  int32_t num_inds=indices.vlen;
546  SGVector<float64_t> output(num_inds);
547 
549 
550  if (io->get_show_progress())
551  io->enable_progress();
552  else
553  io->disable_progress();
554 
555  /* custom kernel never has batch evaluation property so dont do this here */
556  // TODO: port to use OpenMP backend instead of pthread
557 #ifdef HAVE_PTHREAD
558  int32_t num_threads=parallel->get_num_threads();
559 #else
560  int32_t num_threads=1;
561 #endif
562  ASSERT(num_threads>0)
563 
564  if (num_threads<2)
565  {
566  S_THREAD_PARAM_KERNEL_MACHINE params;
567  params.kernel_machine=this;
568  params.result=output.vector;
569 
570  /* use the parameter index vector */
571  params.start=0;
572  params.end=num_inds;
573  params.indices=indices.vector;
574  params.indices_len=indices.vlen;
575 
576  params.verbose=true;
577  apply_helper((void*) &params);
578  }
579 #ifdef HAVE_PTHREAD
580  else
581  {
582  pthread_t* threads = SG_MALLOC(pthread_t, num_threads-1);
583  S_THREAD_PARAM_KERNEL_MACHINE* params=SG_MALLOC(S_THREAD_PARAM_KERNEL_MACHINE, num_threads);
584  int32_t step= num_inds/num_threads;
585 
586  int32_t t;
587  for (t=0; t<num_threads-1; t++)
588  {
589  params[t].kernel_machine=this;
590  params[t].result=output.vector;
591 
592  /* use the parameter index vector */
593  params[t].start=t*step;
594  params[t].end=(t+1)*step;
595  params[t].indices=indices.vector;
596  params[t].indices_len=indices.vlen;
597 
598  params[t].verbose=false;
599  pthread_create(&threads[t], NULL, CKernelMachine::apply_helper,
600  (void*)&params[t]);
601  }
602 
603  params[t].kernel_machine=this;
604  params[t].result=output.vector;
605 
606  /* use the parameter index vector */
607  params[t].start=t*step;
608  params[t].end=num_inds;
609  params[t].indices=indices.vector;
610  params[t].indices_len=indices.vlen;
611 
612  params[t].verbose=true;
613  apply_helper((void*) &params[t]);
614 
615  for (t=0; t<num_threads-1; t++)
616  pthread_join(threads[t], NULL);
617 
618  SG_FREE(params);
619  SG_FREE(threads);
620  }
621 #endif
622 
623 #ifndef WIN32
625  SG_INFO("prematurely stopped.\n")
626  else
627 #endif
628  SG_DONE()
629 
630  return output;
631 }
632 
634 {
635  if ( !kernel )
636  SG_ERROR("The kernel is not initialized\n")
638  SG_ERROR("Locking is not supported (yet) with combined kernel. Please disable it in cross validation")
639 
640  /* init kernel with data */
641  kernel->init(features, features);
642 
643  /* backup reference to old kernel */
647 
648  /* unref possible old custom kernel */
650 
651  /* create custom kernel matrix from current kernel */
654 
655  /* replace kernel by custom kernel */
656  SG_UNREF(kernel);
658  SG_REF(kernel);
659 
660  /* dont forget to call superclass method */
661  CMachine::data_lock(labs, features);
662 }
663 
665 {
667  m_custom_kernel=NULL;
668 
669  /* restore original kernel, possibly delete created one */
670  if (m_kernel_backup)
671  {
672  /* check if kernel was created in train_locked */
673  if (kernel!=m_kernel_backup)
674  SG_UNREF(kernel);
675 
677  m_kernel_backup=NULL;
678  }
679 
680  /* dont forget to call superclass method */
682 }
683 
684 void CKernelMachine::init()
685 {
686  m_bias=0.0;
687  kernel=NULL;
688  m_custom_kernel=NULL;
689  m_kernel_backup=NULL;
691  use_linadd=true;
692  use_bias=true;
693 
694  SG_ADD((CSGObject**) &kernel, "kernel", "", MS_AVAILABLE);
695  SG_ADD((CSGObject**) &m_custom_kernel, "custom_kernel", "Custom kernel for"
696  " data lock", MS_NOT_AVAILABLE);
697  SG_ADD((CSGObject**) &m_kernel_backup, "kernel_backup",
698  "Kernel backup for data lock", MS_NOT_AVAILABLE);
699  SG_ADD(&use_batch_computation, "use_batch_computation",
700  "Batch computation is enabled.", MS_NOT_AVAILABLE);
701  SG_ADD(&use_linadd, "use_linadd", "Linadd is enabled.", MS_NOT_AVAILABLE);
702  SG_ADD(&use_bias, "use_bias", "Bias shall be used.", MS_NOT_AVAILABLE);
703  SG_ADD(&m_bias, "m_bias", "Bias term.", MS_NOT_AVAILABLE);
704  SG_ADD(&m_alpha, "m_alpha", "Array of coefficients alpha.",
706  SG_ADD(&m_svs, "m_svs", "Number of ``support vectors''.", MS_NOT_AVAILABLE);
707 }
708 
710 {
711  return true;
712 }
713 
virtual float64_t apply_one(int32_t num)
virtual const char * get_name() const =0
SGVector< float64_t > apply_get_outputs(CFeatures *data)
virtual bool init(CFeatures *lhs, CFeatures *rhs)
Definition: Kernel.cpp:96
SGVector< int32_t > m_svs
void set_bias_enabled(bool enable_bias)
void range_fill(T start=0)
Definition: SGVector.cpp:208
#define SG_INFO(...)
Definition: SGIO.h:117
#define SG_DONE()
Definition: SGIO.h:156
virtual CBinaryLabels * apply_locked_binary(SGVector< index_t > indices)
Real Labels are real-valued labels.
int32_t get_num_threads() const
Definition: Parallel.cpp:97
int32_t index_t
Definition: common.h:72
virtual void add_row_subset(SGVector< index_t > subset)
The class Labels models labels, i.e. class assignments of objects.
Definition: Labels.h:43
The Custom Kernel allows for custom user provided kernel matrices.
Definition: CustomKernel.h:36
virtual const char * get_name() const
Definition: KernelMachine.h:80
virtual CRegressionLabels * apply_regression(CFeatures *data=NULL)
SGVector< int32_t > get_support_vectors()
virtual int32_t get_num_vectors() const =0
CCustomKernel * m_custom_kernel
CLabels * m_labels
Definition: Machine.h:365
CFeatures * get_rhs()
#define SG_ERROR(...)
Definition: SGIO.h:128
#define REQUIRE(x,...)
Definition: SGIO.h:205
float64_t kernel(int32_t idx_a, int32_t idx_b)
static void * apply_helper(void *p)
virtual bool train_machine(CFeatures *data=NULL)
Definition: Machine.h:322
A generic KernelMachine interface.
Definition: KernelMachine.h:51
Parallel * parallel
Definition: SGObject.h:561
virtual int32_t get_num_vec_lhs()
virtual void remove_all_row_subsets()
#define SG_REF(x)
Definition: SGObject.h:52
A generic learning machine interface.
Definition: Machine.h:143
bool get_is_initialized()
void set_support_vectors(SGVector< int32_t > svs)
virtual bool train_locked(SGVector< index_t > indices)
SGVector< float64_t > m_alpha
virtual void remove_col_subset()
bool has_property(EKernelProperty p)
virtual void add_col_subset(SGVector< index_t > subset)
index_t vlen
Definition: SGVector.h:545
virtual void store_model_features()
#define ASSERT(x)
Definition: SGIO.h:200
Class SGObject is the base class of all shogun objects.
Definition: SGObject.h:125
void set_bias(float64_t bias)
void set_batch_computation_enabled(bool enable)
static void clear_cancel()
Definition: Signal.cpp:126
virtual SGVector< float64_t > apply_locked_get_output(SGVector< index_t > indices)
void disable_progress()
Definition: SGIO.h:419
double float64_t
Definition: common.h:60
bool set_alpha(int32_t idx, float64_t val)
virtual void data_unlock()
Definition: Machine.cpp:143
virtual void data_unlock()
virtual void data_lock(CLabels *labs, CFeatures *features)
Definition: Machine.cpp:112
virtual void remove_subset()
Definition: Labels.cpp:49
virtual float64_t compute_optimized(int32_t vector_idx)
Definition: Kernel.cpp:816
float64_t get_alpha(int32_t idx)
virtual void add_subset(SGVector< index_t > subset)
Definition: Labels.cpp:39
virtual bool supports_locking() const
bool set_support_vector(int32_t idx, int32_t val)
int32_t get_support_vector(int32_t idx)
static bool cancel_computations()
Definition: Signal.h:111
virtual int32_t get_num_vec_rhs()
bool get_show_progress() const
Definition: SGIO.h:279
SGVector< float64_t > get_alphas()
#define SG_UNREF(x)
Definition: SGObject.h:53
#define SG_DEBUG(...)
Definition: SGIO.h:106
all of classes and functions are contained in the shogun namespace
Definition: class_list.h:18
virtual void compute_batch(int32_t num_vec, int32_t *vec_idx, float64_t *target, int32_t num_suppvec, int32_t *IDX, float64_t *alphas, float64_t factor=1.0)
Definition: Kernel.cpp:822
virtual bool init_optimization(int32_t count, int32_t *IDX, float64_t *weights)
Definition: Kernel.cpp:803
virtual CFeatures * copy_subset(SGVector< index_t > indices)
Definition: Features.cpp:340
virtual CRegressionLabels * apply_locked_regression(SGVector< index_t > indices)
void set_alphas(SGVector< float64_t > alphas)
The class Features is the base class of all feature objects.
Definition: Features.h:68
SGVector< T > clone() const
Definition: SGVector.cpp:247
#define SG_SPROGRESS(...)
Definition: SGIO.h:182
void set_linadd_enabled(bool enable)
The Kernel base class.
Binary Labels for binary classification.
Definition: BinaryLabels.h:37
void set_kernel(CKernel *k)
#define SG_ADD(...)
Definition: SGObject.h:94
bool is_data_locked() const
Definition: Machine.h:300
virtual CBinaryLabels * apply_binary(CFeatures *data=NULL)
bool create_new_model(int32_t num)
CFeatures * get_lhs()
virtual void data_lock(CLabels *labs, CFeatures *features=NULL)
void enable_progress()
Definition: SGIO.h:409

SHOGUN Machine Learning Toolbox - Documentation