SHOGUN  3.2.1
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
KernelMachine.cpp
Go to the documentation of this file.
1 /*
2  * This program is free software; you can redistribute it and/or modify
3  * it under the terms of the GNU General Public License as published by
4  * the Free Software Foundation; either version 3 of the License, or
5  * (at your option) any later version.
6  *
7  * Written (W) 1999-2009 Soeren Sonnenburg
8  * Written (W) 2011-2012 Heiko Strathmann
9  * Copyright (C) 1999-2009 Fraunhofer Institute FIRST and Max-Planck-Society
10  */
11 
13 #include <shogun/lib/Signal.h>
15 #include <shogun/io/SGIO.h>
17 
18 #include <shogun/kernel/Kernel.h>
20 #include <shogun/labels/Labels.h>
21 
22 using namespace shogun;
23 
24 #ifndef DOXYGEN_SHOULD_SKIP_THIS
25 struct S_THREAD_PARAM_KERNEL_MACHINE
26 {
27  CKernelMachine* kernel_machine;
28  float64_t* result;
29  int32_t start;
30  int32_t end;
31 
32  /* if non-null, start and end correspond to indices in this vector */
33  index_t* indices;
34  index_t indices_len;
35  bool verbose;
36 };
37 #endif // DOXYGEN_SHOULD_SKIP_THIS
38 
40 {
41  init();
42 }
43 
46 {
47  init();
48 
49  int32_t num_sv=svs.vlen;
50  ASSERT(num_sv == alphas.vlen)
51  create_new_model(num_sv);
52  set_alphas(alphas);
55  set_bias(b);
56 }
57 
59 {
60  init();
61 
62  SGVector<float64_t> alphas = machine->get_alphas().clone();
63  SGVector<int32_t> svs = machine->get_support_vectors().clone();
64  float64_t bias = machine->get_bias();
65  CKernel* ker = machine->get_kernel();
66 
67  int32_t num_sv = svs.vlen;
68  create_new_model(num_sv);
69  set_alphas(alphas);
71  set_bias(bias);
72  set_kernel(ker);
73 }
74 
76 {
80 }
81 
83 {
84  SG_REF(k);
86  kernel=k;
87 }
88 
90 {
91  SG_REF(kernel);
92  return kernel;
93 }
94 
96 {
97  use_batch_computation=enable;
98 }
99 
101 {
102  return use_batch_computation;
103 }
104 
106 {
107  use_linadd=enable;
108 }
109 
111 {
112  return use_linadd;
113 }
114 
115 void CKernelMachine::set_bias_enabled(bool enable_bias)
116 {
117  use_bias=enable_bias;
118 }
119 
121 {
122  return use_bias;
123 }
124 
126 {
127  return m_bias;
128 }
129 
131 {
132  m_bias=bias;
133 }
134 
136 {
137  ASSERT(m_svs.vector && idx<m_svs.vlen)
138  return m_svs.vector[idx];
139 }
140 
142 {
143  if (!m_alpha.vector)
144  SG_ERROR("No alphas set\n")
145  if (idx>=m_alpha.vlen)
146  SG_ERROR("Alphas index (%d) out of range (%d)\n", idx, m_svs.vlen)
147  return m_alpha.vector[idx];
148 }
149 
150 bool CKernelMachine::set_support_vector(int32_t idx, int32_t val)
151 {
152  if (m_svs.vector && idx<m_svs.vlen)
153  m_svs.vector[idx]=val;
154  else
155  return false;
156 
157  return true;
158 }
159 
160 bool CKernelMachine::set_alpha(int32_t idx, float64_t val)
161 {
162  if (m_alpha.vector && idx<m_alpha.vlen)
163  m_alpha.vector[idx]=val;
164  else
165  return false;
166 
167  return true;
168 }
169 
171 {
172  return m_svs.vlen;
173 }
174 
176 {
177  m_alpha = alphas;
178 }
179 
181 {
182  m_svs = svs;
183 }
184 
186 {
187  return m_svs;
188 }
189 
191 {
192  return m_alpha;
193 }
194 
196 {
199 
200  m_bias=0;
201 
202  if (num>0)
203  {
205  m_svs= SGVector<int32_t>(num);
206  return (m_alpha.vector!=NULL && m_svs.vector!=NULL);
207  }
208  else
209  return true;
210 }
211 
213 {
214  int32_t num_sv=get_num_support_vectors();
215 
216  if (kernel && kernel->has_property(KP_LINADD) && num_sv>0)
217  {
218  int32_t * sv_idx = SG_MALLOC(int32_t, num_sv);
219  float64_t* sv_weight = SG_MALLOC(float64_t, num_sv);
220 
221  for(int32_t i=0; i<num_sv; i++)
222  {
223  sv_idx[i] = get_support_vector(i) ;
224  sv_weight[i] = get_alpha(i) ;
225  }
226 
227  bool ret = kernel->init_optimization(num_sv, sv_idx, sv_weight) ;
228 
229  SG_FREE(sv_idx);
230  SG_FREE(sv_weight);
231 
232  if (!ret)
233  SG_ERROR("initialization of kernel optimization failed\n")
234 
235  return ret;
236  }
237  else
238  SG_ERROR("initialization of kernel optimization failed\n")
239 
240  return false;
241 }
242 
244 {
245  SGVector<float64_t> outputs = apply_get_outputs(data);
246  return new CRegressionLabels(outputs);
247 }
248 
250 {
251  SGVector<float64_t> outputs = apply_get_outputs(data);
252  return new CBinaryLabels(outputs);
253 }
254 
256 {
257  SG_DEBUG("entering %s::apply_get_outputs(%s at %p)\n",
258  get_name(), data ? data->get_name() : "NULL", data);
259 
260  REQUIRE(kernel, "%s::apply_get_outputs(): No kernel assigned!\n")
261 
262  if (!kernel->get_num_vec_lhs())
263  {
264  SG_ERROR("%s: No vectors on left hand side (%s). This is probably due to"
265  " an implementation error in %s, where it was forgotten to set "
266  "the data (m_svs) indices\n", get_name(),
267  data->get_name());
268  }
269 
270  if (data)
271  {
272  CFeatures* lhs=kernel->get_lhs();
273  REQUIRE(lhs, "%s::apply_get_outputs(): No left hand side specified\n",
274  get_name());
275  kernel->init(lhs, data);
276  SG_UNREF(lhs);
277  }
278 
279  /* using the features to get num vectors is far safer than using the kernel
280  * since SHOGUNs kernel num_rhs/num_lhs is buggy (CombinedKernel for ex.)
281  * Might be worth investigating why
282  * kernel->get_num_rhs() != rhs->get_num_vectors()
283  * However, the below version works
284  * TODO Heiko Strathmann
285  */
286  CFeatures* rhs=kernel->get_rhs();
287  int32_t num_vectors=rhs ? rhs->get_num_vectors() : kernel->get_num_vec_rhs();
288  SG_UNREF(rhs)
289 
290  SGVector<float64_t> output(num_vectors);
291 
292  if (kernel->get_num_vec_rhs()>0)
293  {
294  SG_DEBUG("computing output on %d test examples\n", num_vectors)
295 
297 
298  if (io->get_show_progress())
299  io->enable_progress();
300  else
301  io->disable_progress();
302 
305  {
306  output.zero();
307  SG_DEBUG("Batch evaluation enabled\n")
308  if (get_num_support_vectors()>0)
309  {
310  int32_t* sv_idx=SG_MALLOC(int32_t, get_num_support_vectors());
311  float64_t* sv_weight=SG_MALLOC(float64_t, get_num_support_vectors());
312  int32_t* idx=SG_MALLOC(int32_t, num_vectors);
313 
314  //compute output for all vectors v[0]...v[num_vectors-1]
315  for (int32_t i=0; i<num_vectors; i++)
316  idx[i]=i;
317 
318  for (int32_t i=0; i<get_num_support_vectors(); i++)
319  {
320  sv_idx[i] = get_support_vector(i) ;
321  sv_weight[i] = get_alpha(i) ;
322  }
323 
324  kernel->compute_batch(num_vectors, idx,
325  output.vector, get_num_support_vectors(), sv_idx, sv_weight);
326  SG_FREE(sv_idx);
327  SG_FREE(sv_weight);
328  SG_FREE(idx);
329  }
330 
331  for (int32_t i=0; i<num_vectors; i++)
332  output[i] = get_bias() + output[i];
333 
334  }
335  else
336  {
337  int32_t num_threads=parallel->get_num_threads();
338  ASSERT(num_threads>0)
339 
340  if (num_threads < 2)
341  {
342  S_THREAD_PARAM_KERNEL_MACHINE params;
343  params.kernel_machine=this;
344  params.result = output.vector;
345  params.start=0;
346  params.end=num_vectors;
347  params.verbose=true;
348  params.indices = NULL;
349  params.indices_len = 0;
350  apply_helper((void*) &params);
351  }
352 #ifdef HAVE_PTHREAD
353  else
354  {
355  pthread_t* threads = SG_MALLOC(pthread_t, num_threads-1);
356  S_THREAD_PARAM_KERNEL_MACHINE* params = SG_MALLOC(S_THREAD_PARAM_KERNEL_MACHINE, num_threads);
357  int32_t step= num_vectors/num_threads;
358 
359  int32_t t;
360 
361  for (t=0; t<num_threads-1; t++)
362  {
363  params[t].kernel_machine = this;
364  params[t].result = output.vector;
365  params[t].start = t*step;
366  params[t].end = (t+1)*step;
367  params[t].verbose = false;
368  params[t].indices = NULL;
369  params[t].indices_len = 0;
370  pthread_create(&threads[t], NULL,
371  CKernelMachine::apply_helper, (void*)&params[t]);
372  }
373 
374  params[t].kernel_machine = this;
375  params[t].result = output.vector;
376  params[t].start = t*step;
377  params[t].end = num_vectors;
378  params[t].verbose = true;
379  params[t].indices = NULL;
380  params[t].indices_len = 0;
381  apply_helper((void*) &params[t]);
382 
383  for (t=0; t<num_threads-1; t++)
384  pthread_join(threads[t], NULL);
385 
386  SG_FREE(params);
387  SG_FREE(threads);
388  }
389 #endif
390  }
391 
392 #ifndef WIN32
394  SG_INFO("prematurely stopped. \n")
395  else
396 #endif
397  SG_DONE()
398  }
399 
400  SG_DEBUG("leaving %s::apply_get_outputs(%s at %p)\n",
401  get_name(), data ? data->get_name() : "NULL", data);
402 
403  return output;
404 }
405 
407 {
408  ASSERT(kernel)
409 
411  {
412  float64_t score = kernel->compute_optimized(num);
413  return score+get_bias();
414  }
415  else
416  {
417  float64_t score=0;
418  for(int32_t i=0; i<get_num_support_vectors(); i++)
419  score+=kernel->kernel(get_support_vector(i), num)*get_alpha(i);
420 
421  return score+get_bias();
422  }
423 }
424 
426 {
427  S_THREAD_PARAM_KERNEL_MACHINE* params = (S_THREAD_PARAM_KERNEL_MACHINE*) p;
428  float64_t* result = params->result;
429  CKernelMachine* kernel_machine = params->kernel_machine;
430 
431 #ifdef WIN32
432  for (int32_t vec=params->start; vec<params->end; vec++)
433 #else
434  for (int32_t vec=params->start; vec<params->end &&
436 #endif
437  {
438  if (params->verbose)
439  {
440  int32_t num_vectors=params->end - params->start;
441  int32_t v=vec-params->start;
442  if ( (v% (num_vectors/100+1))== 0)
443  SG_SPROGRESS(v, 0.0, num_vectors-1)
444  }
445 
446  /* eventually use index mapping if exists */
447  index_t idx=params->indices ? params->indices[vec] : vec;
448  result[vec] = kernel_machine->apply_one(idx);
449  }
450 
451  return NULL;
452 }
453 
455 {
456  if (!kernel)
457  SG_ERROR("kernel is needed to store SV features.\n")
458 
459  CFeatures* lhs=kernel->get_lhs();
460  CFeatures* rhs=kernel->get_rhs();
461 
462  if (!lhs)
463  SG_ERROR("kernel lhs is needed to store SV features.\n")
464 
465  /* copy sv feature data */
466  CFeatures* sv_features=lhs->copy_subset(m_svs);
467  SG_UNREF(lhs);
468 
469  /* set new lhs to kernel */
470  kernel->init(sv_features, rhs);
471 
472  /* unref rhs */
473  SG_UNREF(rhs);
474 
475  /* was SG_REF'ed by copy_subset */
476  SG_UNREF(sv_features);
477 
478  /* now sv indices are just the identity */
479  m_svs.range_fill();
480 
481 }
482 
484 {
485  SG_DEBUG("entering %s::train_locked()\n", get_name())
486  if (!is_data_locked())
487  SG_ERROR("CKernelMachine::train_locked() call data_lock() before!\n")
488 
489  /* this is asusmed here */
491 
492  /* since its not easily possible to controll the row subsets of the custom
493  * kernel from outside, we enforce that there is only one row subset by
494  * removing all of them. Otherwise, they would add up in the stack until
495  * an error occurs */
497 
498  /* set custom kernel subset of data to train on */
501 
502  /* set corresponding labels subset */
503  m_labels->add_subset(indices);
504 
505  /* dont do train because model should not be stored (no acutal features)
506  * and train does data_unlock */
507  bool result=train_machine();
508 
509  /* remove last col subset of custom kernel */
511 
512  /* remove label subset after training */
514 
515  SG_DEBUG("leaving %s::train_locked()\n", get_name())
516  return result;
517 }
518 
520 {
521  SGVector<float64_t> outputs = apply_locked_get_output(indices);
522  return new CBinaryLabels(outputs);
523 }
524 
526  SGVector<index_t> indices)
527 {
528  SGVector<float64_t> outputs = apply_locked_get_output(indices);
529  return new CRegressionLabels(outputs);
530 }
531 
533  SGVector<index_t> indices)
534 {
535  if (!is_data_locked())
536  SG_ERROR("CKernelMachine::apply_locked() call data_lock() before!\n")
537 
538  /* we are working on a custom kernel here */
540 
541  int32_t num_inds=indices.vlen;
542  SGVector<float64_t> output(num_inds);
543 
545 
546  if (io->get_show_progress())
547  io->enable_progress();
548  else
549  io->disable_progress();
550 
551  /* custom kernel never has batch evaluation property so dont do this here */
552  int32_t num_threads=parallel->get_num_threads();
553  ASSERT(num_threads>0)
554 
555  if (num_threads<2)
556  {
557  S_THREAD_PARAM_KERNEL_MACHINE params;
558  params.kernel_machine=this;
559  params.result=output.vector;
560 
561  /* use the parameter index vector */
562  params.start=0;
563  params.end=num_inds;
564  params.indices=indices.vector;
565  params.indices_len=indices.vlen;
566 
567  params.verbose=true;
568  apply_helper((void*) &params);
569  }
570 #ifdef HAVE_PTHREAD
571  else
572  {
573  pthread_t* threads = SG_MALLOC(pthread_t, num_threads-1);
574  S_THREAD_PARAM_KERNEL_MACHINE* params=SG_MALLOC(S_THREAD_PARAM_KERNEL_MACHINE, num_threads);
575  int32_t step= num_inds/num_threads;
576 
577  int32_t t;
578  for (t=0; t<num_threads-1; t++)
579  {
580  params[t].kernel_machine=this;
581  params[t].result=output.vector;
582 
583  /* use the parameter index vector */
584  params[t].start=t*step;
585  params[t].end=(t+1)*step;
586  params[t].indices=indices.vector;
587  params[t].indices_len=indices.vlen;
588 
589  params[t].verbose=false;
590  pthread_create(&threads[t], NULL, CKernelMachine::apply_helper,
591  (void*)&params[t]);
592  }
593 
594  params[t].kernel_machine=this;
595  params[t].result=output.vector;
596 
597  /* use the parameter index vector */
598  params[t].start=t*step;
599  params[t].end=num_inds;
600  params[t].indices=indices.vector;
601  params[t].indices_len=indices.vlen;
602 
603  params[t].verbose=true;
604  apply_helper((void*) &params[t]);
605 
606  for (t=0; t<num_threads-1; t++)
607  pthread_join(threads[t], NULL);
608 
609  SG_FREE(params);
610  SG_FREE(threads);
611  }
612 #endif
613 
614 #ifndef WIN32
616  SG_INFO("prematurely stopped.\n")
617  else
618 #endif
619  SG_DONE()
620 
621  return output;
622 }
623 
625 {
626  if ( !kernel )
627  SG_ERROR("The kernel is not initialized\n")
629  SG_ERROR("Locking is not supported (yet) with combined kernel. Please disable it in cross validation")
630 
631  /* init kernel with data */
632  kernel->init(features, features);
633 
634  /* backup reference to old kernel */
638 
639  /* unref possible old custom kernel */
641 
642  /* create custom kernel matrix from current kernel */
645 
646  /* replace kernel by custom kernel */
647  SG_UNREF(kernel);
649  SG_REF(kernel);
650 
651  /* dont forget to call superclass method */
652  CMachine::data_lock(labs, features);
653 }
654 
656 {
658  m_custom_kernel=NULL;
659 
660  /* restore original kernel, possibly delete created one */
661  if (m_kernel_backup)
662  {
663  /* check if kernel was created in train_locked */
664  if (kernel!=m_kernel_backup)
665  SG_UNREF(kernel);
666 
668  m_kernel_backup=NULL;
669  }
670 
671  /* dont forget to call superclass method */
673 }
674 
675 void CKernelMachine::init()
676 {
677  m_bias=0.0;
678  kernel=NULL;
679  m_custom_kernel=NULL;
680  m_kernel_backup=NULL;
682  use_linadd=true;
683  use_bias=true;
684 
685  SG_ADD((CSGObject**) &kernel, "kernel", "", MS_AVAILABLE);
686  SG_ADD((CSGObject**) &m_custom_kernel, "custom_kernel", "Custom kernel for"
687  " data lock", MS_NOT_AVAILABLE);
688  SG_ADD((CSGObject**) &m_kernel_backup, "kernel_backup",
689  "Kernel backup for data lock", MS_NOT_AVAILABLE);
690  SG_ADD(&use_batch_computation, "use_batch_computation",
691  "Batch computation is enabled.", MS_NOT_AVAILABLE);
692  SG_ADD(&use_linadd, "use_linadd", "Linadd is enabled.", MS_NOT_AVAILABLE);
693  SG_ADD(&use_bias, "use_bias", "Bias shall be used.", MS_NOT_AVAILABLE);
694  SG_ADD(&m_bias, "m_bias", "Bias term.", MS_NOT_AVAILABLE);
695  SG_ADD(&m_alpha, "m_alpha", "Array of coefficients alpha.",
697  SG_ADD(&m_svs, "m_svs", "Number of ``support vectors''.", MS_NOT_AVAILABLE);
698 
699  /* new parameter from param version 0 to 1 */
701  new SGParamInfo("custom_kernel", CT_SCALAR, ST_NONE, PT_SGOBJECT, 1),
702  new SGParamInfo()
703  );
704 
705  /* new parameter from param version 0 to 1 */
707  new SGParamInfo("kernel_backup", CT_SCALAR, ST_NONE, PT_SGOBJECT, 1),
708  new SGParamInfo()
709  );
711 }
712 
714 {
715  return true;
716 }
717 

SHOGUN Machine Learning Toolbox - Documentation