SHOGUN  6.0.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules
Kernel.cpp
Go to the documentation of this file.
1 /*
2  * EXCEPT FOR THE KERNEL CACHING FUNCTIONS WHICH ARE (W) THORSTEN JOACHIMS
3  * COPYRIGHT (C) 1999 UNIVERSITAET DORTMUND - ALL RIGHTS RESERVED
4  *
5  * this program is free software; you can redistribute it and/or modify
6  * it under the terms of the GNU General Public License as published by
7  * the Free Software Foundation; either version 3 of the License, or
8  * (at your option) any later version.
9  *
10  * Written (W) 1999-2009 Soeren Sonnenburg
11  * Written (W) 1999-2008 Gunnar Raetsch
12  * Copyright (C) 1999-2009 Fraunhofer Institute FIRST and Max-Planck-Society
13  */
14 
15 #include <shogun/lib/config.h>
16 #include <shogun/lib/common.h>
17 #include <shogun/io/SGIO.h>
18 #include <shogun/io/File.h>
19 #include <shogun/lib/Time.h>
20 #include <shogun/lib/Signal.h>
21 
22 #include <shogun/base/Parallel.h>
23 
24 #include <shogun/kernel/Kernel.h>
27 #include <shogun/base/Parameter.h>
28 
30 
31 #include <string.h>
32 #ifndef _WIN32
33 #include <unistd.h>
34 #endif
36 
37 using namespace shogun;
38 
40 {
41  init();
43 }
44 
45 CKernel::CKernel(int32_t size) : CSGObject()
46 {
47  init();
48 
49  if (size<10)
50  size=10;
51 
52  cache_size=size;
54 }
55 
56 
57 CKernel::CKernel(CFeatures* p_lhs, CFeatures* p_rhs, int32_t size) : CSGObject()
58 {
59  init();
60 
61  if (size<10)
62  size=10;
63 
64  cache_size=size;
65 
67  init(p_lhs, p_rhs);
69 }
70 
72 {
73  if (get_is_initialized())
74  SG_ERROR("Kernel still initialized on destruction.\n")
75 
78 
79  SG_INFO("Kernel deleted (%p).\n", this)
80 }
81 
82 #ifdef USE_SVMLIGHT
83 void CKernel::resize_kernel_cache(KERNELCACHE_IDX size, bool regression_hack)
84 {
85  if (size<10)
86  size=10;
87 
89  cache_size=size;
90 
91  if (has_features() && get_num_vec_lhs())
92  kernel_cache_init(cache_size, regression_hack);
93 }
94 #endif //USE_SVMLIGHT
95 
96 bool CKernel::init(CFeatures* l, CFeatures* r)
97 {
98  /* make sure that features are not deleted if same ones are used */
99  SG_REF(l);
100  SG_REF(r);
101 
102  //make sure features were indeed supplied
103  REQUIRE(l, "CKernel::init(%p, %p): Left hand side features required!\n", l, r)
104  REQUIRE(r, "CKernel::init(%p, %p): Right hand side features required!\n", l, r)
105 
106  //make sure features are compatible
107  if (l->support_compatible_class())
108  {
110  "Right hand side of features (%s) must be compatible with left hand side features (%s)\n",
111  l->get_name(), r->get_name());
112  }
113  else
114  {
116  "Right hand side of features (%s) must be compatible with left hand side features (%s)\n",
117  l->get_name(), r->get_name())
118  }
120 
121  //remove references to previous features
123 
124  //increase reference counts
125  SG_REF(l);
126  if (l==r)
127  lhs_equals_rhs=true;
128  else // l!=r
129  SG_REF(r);
130 
131  lhs=l;
132  rhs=r;
133 
136 
139 
140  /* unref "safety" refs from beginning */
141  SG_UNREF(r);
142  SG_UNREF(l);
143 
144  SG_DEBUG("leaving CKernel::init(%p, %p)\n", l, r)
145  return true;
146 }
147 
149 {
150  SG_REF(n);
151  if (lhs && rhs)
152  n->init(this);
153 
155  normalizer=n;
156 
157  return (normalizer!=NULL);
158 }
159 
161 {
163  return normalizer;
164 }
165 
167 {
168  return normalizer->init(this);
169 }
170 
172 {
174 }
175 
176 #ifdef USE_SVMLIGHT
177 /****************************** Cache handling *******************************/
178 
179 void CKernel::kernel_cache_init(int32_t buffsize, bool regression_hack)
180 {
181  int32_t totdoc=get_num_vec_lhs();
182  if (totdoc<=0)
183  {
184  SG_ERROR("kernel has zero rows: num_lhs=%d num_rhs=%d\n",
186  }
187  uint64_t buffer_size=0;
188  int32_t i;
189 
190  //in regression the additional constraints are made by doubling the training data
191  if (regression_hack)
192  totdoc*=2;
193 
194  buffer_size=((uint64_t) buffsize)*1024*1024/sizeof(KERNELCACHE_ELEM);
195  if (buffer_size>((uint64_t) totdoc)*totdoc)
196  buffer_size=((uint64_t) totdoc)*totdoc;
197 
198  SG_INFO("using a kernel cache of size %lld MB (%lld bytes) for %s Kernel\n", buffer_size*sizeof(KERNELCACHE_ELEM)/1024/1024, buffer_size*sizeof(KERNELCACHE_ELEM), get_name())
199 
200  //make sure it fits in the *signed* KERNELCACHE_IDX type
201  ASSERT(buffer_size < (((uint64_t) 1) << (sizeof(KERNELCACHE_IDX)*8-1)))
202 
203  kernel_cache.index = SG_MALLOC(int32_t, totdoc);
204  kernel_cache.occu = SG_MALLOC(int32_t, totdoc);
205  kernel_cache.lru = SG_MALLOC(int32_t, totdoc);
206  kernel_cache.invindex = SG_MALLOC(int32_t, totdoc);
207  kernel_cache.active2totdoc = SG_MALLOC(int32_t, totdoc);
208  kernel_cache.totdoc2active = SG_MALLOC(int32_t, totdoc);
209  kernel_cache.buffer = SG_MALLOC(KERNELCACHE_ELEM, buffer_size);
210  kernel_cache.buffsize=buffer_size;
211  kernel_cache.max_elems=(int32_t) (kernel_cache.buffsize/totdoc);
212 
213  if(kernel_cache.max_elems>totdoc) {
214  kernel_cache.max_elems=totdoc;
215  }
216 
217  kernel_cache.elems=0; // initialize cache
218  for(i=0;i<totdoc;i++) {
219  kernel_cache.index[i]=-1;
220  kernel_cache.lru[i]=0;
221  }
222  for(i=0;i<totdoc;i++) {
223  kernel_cache.occu[i]=0;
224  kernel_cache.invindex[i]=-1;
225  }
226 
227  kernel_cache.activenum=totdoc;;
228  for(i=0;i<totdoc;i++) {
229  kernel_cache.active2totdoc[i]=i;
230  kernel_cache.totdoc2active[i]=i;
231  }
232 
233  kernel_cache.time=0;
234 }
235 
237  int32_t docnum, int32_t *active2dnum, float64_t *buffer, bool full_line)
238 {
239  int32_t i,j;
240  KERNELCACHE_IDX start;
241 
242  int32_t num_vectors = get_num_vec_lhs();
243  if (docnum>=num_vectors)
244  docnum=2*num_vectors-1-docnum;
245 
246  /* is cached? */
247  if(kernel_cache.index[docnum] != -1)
248  {
249  kernel_cache.lru[kernel_cache.index[docnum]]=kernel_cache.time; /* lru */
250  start=((KERNELCACHE_IDX) kernel_cache.activenum)*kernel_cache.index[docnum];
251 
252  if (full_line)
253  {
254  for(j=0;j<get_num_vec_lhs();j++)
255  {
256  if(kernel_cache.totdoc2active[j] >= 0)
257  buffer[j]=kernel_cache.buffer[start+kernel_cache.totdoc2active[j]];
258  else
259  buffer[j]=(float64_t) kernel(docnum, j);
260  }
261  }
262  else
263  {
264  for(i=0;(j=active2dnum[i])>=0;i++)
265  {
266  if(kernel_cache.totdoc2active[j] >= 0)
267  buffer[j]=kernel_cache.buffer[start+kernel_cache.totdoc2active[j]];
268  else
269  {
270  int32_t k=j;
271  if (k>=num_vectors)
272  k=2*num_vectors-1-k;
273  buffer[j]=(float64_t) kernel(docnum, k);
274  }
275  }
276  }
277  }
278  else
279  {
280  if (full_line)
281  {
282  for(j=0;j<get_num_vec_lhs();j++)
283  buffer[j]=(KERNELCACHE_ELEM) kernel(docnum, j);
284  }
285  else
286  {
287  for(i=0;(j=active2dnum[i])>=0;i++)
288  {
289  int32_t k=j;
290  if (k>=num_vectors)
291  k=2*num_vectors-1-k;
292  buffer[j]=(KERNELCACHE_ELEM) kernel(docnum, k);
293  }
294  }
295  }
296 }
297 
298 
299 // Fills cache for the row m
301 {
302  register int32_t j,k,l;
303  register KERNELCACHE_ELEM *cache;
304 
305  int32_t num_vectors = get_num_vec_lhs();
306 
307  if (m>=num_vectors)
308  m=2*num_vectors-1-m;
309 
310  if(!kernel_cache_check(m)) // not cached yet
311  {
312  cache = kernel_cache_clean_and_malloc(m);
313  if(cache) {
314  l=kernel_cache.totdoc2active[m];
315 
316  for(j=0;j<kernel_cache.activenum;j++) // fill cache
317  {
318  k=kernel_cache.active2totdoc[j];
319 
320  if((kernel_cache.index[k] != -1) && (l != -1) && (k != m)) {
321  cache[j]=kernel_cache.buffer[((KERNELCACHE_IDX) kernel_cache.activenum)
322  *kernel_cache.index[k]+l];
323  }
324  else
325  {
326  if (k>=num_vectors)
327  k=2*num_vectors-1-k;
328 
329  cache[j]=kernel(m, k);
330  }
331  }
332  }
333  else
334  perror("Error: Kernel cache full! => increase cache size");
335  }
336 }
337 
338 
339 void* CKernel::cache_multiple_kernel_row_helper(void* p)
340 {
341  int32_t j,k,l;
342  S_KTHREAD_PARAM* params = (S_KTHREAD_PARAM*) p;
343 
344  for (int32_t i=params->start; i<params->end; i++)
345  {
346  KERNELCACHE_ELEM* cache=params->cache[i];
347  int32_t m = params->uncached_rows[i];
348  l=params->kernel_cache->totdoc2active[m];
349 
350  for(j=0;j<params->kernel_cache->activenum;j++) // fill cache
351  {
352  k=params->kernel_cache->active2totdoc[j];
353 
354  if((params->kernel_cache->index[k] != -1) && (l != -1) && (!params->needs_computation[k])) {
355  cache[j]=params->kernel_cache->buffer[((KERNELCACHE_IDX) params->kernel_cache->activenum)
356  *params->kernel_cache->index[k]+l];
357  }
358  else
359  {
360  if (k>=params->num_vectors)
361  k=2*params->num_vectors-1-k;
362 
363  cache[j]=params->kernel->kernel(m, k);
364  }
365  }
366 
367  //now line m is cached
368  params->needs_computation[m]=0;
369  }
370  return NULL;
371 }
372 
373 // Fills cache for the rows in key
374 void CKernel::cache_multiple_kernel_rows(int32_t* rows, int32_t num_rows)
375 {
376  int32_t nthreads=parallel->get_num_threads();
377 
378  if (nthreads<2)
379  {
380  for(int32_t i=0;i<num_rows;i++)
381  cache_kernel_row(rows[i]);
382  }
383  else
384  {
385  // fill up kernel cache
386  int32_t* uncached_rows = SG_MALLOC(int32_t, num_rows);
387  KERNELCACHE_ELEM** cache = SG_MALLOC(KERNELCACHE_ELEM*, num_rows);
388  S_KTHREAD_PARAM params;
389  int32_t num_threads=nthreads-1;
390  int32_t num_vec=get_num_vec_lhs();
391  ASSERT(num_vec>0)
392  uint8_t* needs_computation=SG_CALLOC(uint8_t, num_vec);
393 
394  int32_t step=0;
395  int32_t num=0;
396  int32_t end=0;
397 
398  // allocate cachelines if necessary
399  for (int32_t i=0; i<num_rows; i++)
400  {
401  int32_t idx=rows[i];
402  if (idx>=num_vec)
403  idx=2*num_vec-1-idx;
404 
405  if (kernel_cache_check(idx))
406  continue;
407 
408  needs_computation[idx]=1;
409  uncached_rows[num]=idx;
410  cache[num]= kernel_cache_clean_and_malloc(idx);
411 
412  if (!cache[num])
413  SG_ERROR("Kernel cache full! => increase cache size\n")
414 
415  num++;
416  }
417 
418  if (num>0)
419  {
420  step = num/nthreads;
421 
422  if (step<1)
423  {
424  num_threads=num-1;
425  step=1;
426  }
427 
428  #pragma omp parallel for private(params)
429  for (int32_t t=0; t<num_threads; t++)
430  {
431  params.kernel = this;
432  params.kernel_cache = &kernel_cache;
433  params.cache = cache;
434  params.uncached_rows = uncached_rows;
435  params.needs_computation = needs_computation;
436  params.num_uncached = num;
437  params.start = t*step;
438  params.end = (t+1)*step;
439  params.num_vectors = get_num_vec_lhs();
440  end=params.end;
441 
442  cache_multiple_kernel_row_helper(&params);
443  }
444  }
445  else
446  num_threads=-1;
447 
448 
449  S_KTHREAD_PARAM last_param;
450  last_param.kernel = this;
451  last_param.kernel_cache = &kernel_cache;
452  last_param.cache = cache;
453  last_param.uncached_rows = uncached_rows;
454  last_param.needs_computation = needs_computation;
455  last_param.start = end;
456  last_param.num_uncached = num;
457  last_param.end = num;
458  last_param.num_vectors = get_num_vec_lhs();
459 
460  cache_multiple_kernel_row_helper(&last_param);
461 
462  SG_FREE(needs_computation);
463  SG_FREE(cache);
464  SG_FREE(uncached_rows);
465  }
466 }
467 
468 // remove numshrink columns in the cache
469 // which correspond to examples marked
471  int32_t totdoc, int32_t numshrink, int32_t *after)
472 {
473  ASSERT(totdoc > 0);
474  register int32_t i,j,jj,scount; // 0 in after.
475  KERNELCACHE_IDX from=0,to=0;
476  int32_t *keep;
477 
478  keep=SG_MALLOC(int32_t, totdoc);
479  for(j=0;j<totdoc;j++) {
480  keep[j]=1;
481  }
482  scount=0;
483  for(jj=0;(jj<kernel_cache.activenum) && (scount<numshrink);jj++) {
484  j=kernel_cache.active2totdoc[jj];
485  if(!after[j]) {
486  scount++;
487  keep[j]=0;
488  }
489  }
490 
491  for(i=0;i<kernel_cache.max_elems;i++) {
492  for(jj=0;jj<kernel_cache.activenum;jj++) {
493  j=kernel_cache.active2totdoc[jj];
494  if(!keep[j]) {
495  from++;
496  }
497  else {
498  kernel_cache.buffer[to]=kernel_cache.buffer[from];
499  to++;
500  from++;
501  }
502  }
503  }
504 
505  kernel_cache.activenum=0;
506  for(j=0;j<totdoc;j++) {
507  if((keep[j]) && (kernel_cache.totdoc2active[j] != -1)) {
508  kernel_cache.active2totdoc[kernel_cache.activenum]=j;
509  kernel_cache.totdoc2active[j]=kernel_cache.activenum;
510  kernel_cache.activenum++;
511  }
512  else {
513  kernel_cache.totdoc2active[j]=-1;
514  }
515  }
516 
517  kernel_cache.max_elems= (int32_t) kernel_cache.buffsize;
518 
519  if (kernel_cache.activenum>0)
520  kernel_cache.buffsize/=kernel_cache.activenum;
521 
522  if(kernel_cache.max_elems>totdoc)
523  kernel_cache.max_elems=totdoc;
524 
525  SG_FREE(keep);
526 
527 }
528 
530 {
531  int32_t maxlru=0,k;
532 
533  for(k=0;k<kernel_cache.max_elems;k++) {
534  if(maxlru < kernel_cache.lru[k])
535  maxlru=kernel_cache.lru[k];
536  }
537  for(k=0;k<kernel_cache.max_elems;k++) {
538  kernel_cache.lru[k]-=maxlru;
539  }
540 }
541 
543 {
544  SG_FREE(kernel_cache.index);
545  SG_FREE(kernel_cache.occu);
546  SG_FREE(kernel_cache.lru);
547  SG_FREE(kernel_cache.invindex);
548  SG_FREE(kernel_cache.active2totdoc);
549  SG_FREE(kernel_cache.totdoc2active);
550  SG_FREE(kernel_cache.buffer);
551  memset(&kernel_cache, 0x0, sizeof(KERNEL_CACHE));
552 }
553 
554 int32_t CKernel::kernel_cache_malloc()
555 {
556  int32_t i;
557 
559  for(i=0;i<kernel_cache.max_elems;i++) {
560  if(!kernel_cache.occu[i]) {
561  kernel_cache.occu[i]=1;
562  kernel_cache.elems++;
563  return(i);
564  }
565  }
566  }
567  return(-1);
568 }
569 
570 void CKernel::kernel_cache_free(int32_t cacheidx)
571 {
572  kernel_cache.occu[cacheidx]=0;
573  kernel_cache.elems--;
574 }
575 
576 // remove least recently used cache
577 // element
578 int32_t CKernel::kernel_cache_free_lru()
579 {
580  register int32_t k,least_elem=-1,least_time;
581 
582  least_time=kernel_cache.time+1;
583  for(k=0;k<kernel_cache.max_elems;k++) {
584  if(kernel_cache.invindex[k] != -1) {
585  if(kernel_cache.lru[k]<least_time) {
586  least_time=kernel_cache.lru[k];
587  least_elem=k;
588  }
589  }
590  }
591 
592  if(least_elem != -1) {
593  kernel_cache_free(least_elem);
594  kernel_cache.index[kernel_cache.invindex[least_elem]]=-1;
595  kernel_cache.invindex[least_elem]=-1;
596  return(1);
597  }
598  return(0);
599 }
600 
601 // Get a free cache entry. In case cache is full, the lru
602 // element is removed.
603 KERNELCACHE_ELEM* CKernel::kernel_cache_clean_and_malloc(int32_t cacheidx)
604 {
605  int32_t result;
606  if((result = kernel_cache_malloc()) == -1) {
607  if(kernel_cache_free_lru()) {
608  result = kernel_cache_malloc();
609  }
610  }
611  kernel_cache.index[cacheidx]=result;
612  if(result == -1) {
613  return(0);
614  }
615  kernel_cache.invindex[result]=cacheidx;
616  kernel_cache.lru[kernel_cache.index[cacheidx]]=kernel_cache.time; // lru
617  return &kernel_cache.buffer[((KERNELCACHE_IDX) kernel_cache.activenum)*kernel_cache.index[cacheidx]];
618 }
619 #endif //USE_SVMLIGHT
620 
621 void CKernel::load(CFile* loader)
622 {
625 }
626 
627 void CKernel::save(CFile* writer)
628 {
629  SGMatrix<float64_t> k_matrix=get_kernel_matrix<float64_t>();
631  writer->set_matrix(k_matrix.matrix, k_matrix.num_rows, k_matrix.num_cols);
633 }
634 
636 {
637  SG_DEBUG("entering CKernel::remove_lhs_and_rhs\n")
638  if (rhs!=lhs)
639  SG_UNREF(rhs);
640  rhs = NULL;
641  num_rhs=0;
642 
643  SG_UNREF(lhs);
644  lhs = NULL;
645  num_lhs=0;
646  lhs_equals_rhs=false;
647 
648 #ifdef USE_SVMLIGHT
649  cache_reset();
650 #endif //USE_SVMLIGHT
651  SG_DEBUG("leaving CKernel::remove_lhs_and_rhs\n")
652 }
653 
655 {
656  if (rhs==lhs)
657  rhs=NULL;
658  SG_UNREF(lhs);
659  lhs = NULL;
660  num_lhs=0;
661  lhs_equals_rhs=false;
662 #ifdef USE_SVMLIGHT
663  cache_reset();
664 #endif //USE_SVMLIGHT
665 }
666 
669 {
670  if (rhs!=lhs)
671  SG_UNREF(rhs);
672  rhs = NULL;
673  num_rhs=0;
674  lhs_equals_rhs=false;
675 
676 #ifdef USE_SVMLIGHT
677  cache_reset();
678 #endif //USE_SVMLIGHT
679 }
680 
681 #define ENUM_CASE(n) case n: SG_INFO(#n " ") break;
682 
684 {
685  SG_INFO("%p - \"%s\" weight=%1.2f OPT:%s", this, get_name(),
687  get_optimization_type()==FASTBUTMEMHUNGRY ? "FASTBUTMEMHUNGRY" :
688  "SLOWBUTMEMEFFICIENT");
689 
690  switch (get_kernel_type())
691  {
754  }
755 
756  switch (get_feature_class())
757  {
768  ENUM_CASE(C_WD)
780  }
781 
782  switch (get_feature_type())
783  {
798  }
799  SG_INFO("\n")
800 }
801 #undef ENUM_CASE
802 
804  int32_t count, int32_t *IDX, float64_t * weights)
805 {
806  SG_ERROR("kernel does not support linadd optimization\n")
807  return false ;
808 }
809 
811 {
812  SG_ERROR("kernel does not support linadd optimization\n")
813  return false;
814 }
815 
817 {
818  SG_ERROR("kernel does not support linadd optimization\n")
819  return 0;
820 }
821 
823  int32_t num_vec, int32_t* vec_idx, float64_t* target, int32_t num_suppvec,
824  int32_t* IDX, float64_t* weights, float64_t factor)
825 {
826  SG_ERROR("kernel does not support batch computation\n")
827 }
828 
829 void CKernel::add_to_normal(int32_t vector_idx, float64_t weight)
830 {
831  SG_ERROR("kernel does not support linadd optimization, add_to_normal not implemented\n")
832 }
833 
835 {
836  SG_ERROR("kernel does not support linadd optimization, clear_normal not implemented\n")
837 }
838 
840 {
841  return 1;
842 }
843 
845  int32_t vector_idx, float64_t * subkernel_contrib)
846 {
847  SG_ERROR("kernel compute_by_subkernel not implemented\n")
848 }
849 
850 const float64_t* CKernel::get_subkernel_weights(int32_t &num_weights)
851 {
852  num_weights=1 ;
853  return &combined_kernel_weight ;
854 }
855 
857 {
858  int num_weights = 1;
859  const float64_t* weight = get_subkernel_weights(num_weights);
860  return SGVector<float64_t>(const_cast<float64_t*>(weight),1,false);
861 }
862 
864 {
865  ASSERT(weights.vector)
866  if (weights.vlen!=1)
867  SG_ERROR("number of subkernel weights should be one ...\n")
868 
869  combined_kernel_weight = weights.vector[0] ;
870 }
871 
873 {
874  if (kernel)
875  {
876  CKernel* casted=dynamic_cast<CKernel*>(kernel);
877  REQUIRE(casted, "CKernel::obtain_from_generic(): Error, provided object"
878  " of class \"%s\" is not a subclass of CKernel!\n",
879  kernel->get_name());
880  return casted;
881  }
882  else
883  return NULL;
884 }
885 
887 {
888  int32_t num_suppvec=svm->get_num_support_vectors();
889  int32_t* sv_idx=SG_MALLOC(int32_t, num_suppvec);
890  float64_t* sv_weight=SG_MALLOC(float64_t, num_suppvec);
891 
892  for (int32_t i=0; i<num_suppvec; i++)
893  {
894  sv_idx[i] = svm->get_support_vector(i);
895  sv_weight[i] = svm->get_alpha(i);
896  }
897  bool ret = init_optimization(num_suppvec, sv_idx, sv_weight);
898 
899  SG_FREE(sv_idx);
900  SG_FREE(sv_weight);
901  return ret;
902 }
903 
905 {
907  if (lhs_equals_rhs)
908  rhs=lhs;
909 }
910 
912 {
914 
915  if (lhs_equals_rhs)
916  rhs=NULL;
917 }
918 
920 {
922 
923  if (lhs_equals_rhs)
924  rhs=lhs;
925 }
926 
928  SG_ADD(&cache_size, "cache_size",
929  "Cache size in MB.", MS_NOT_AVAILABLE);
930  SG_ADD((CSGObject**) &lhs, "lhs",
931  "Feature vectors to occur on left hand side.", MS_NOT_AVAILABLE);
932  SG_ADD((CSGObject**) &rhs, "rhs",
933  "Feature vectors to occur on right hand side.", MS_NOT_AVAILABLE);
934  SG_ADD(&lhs_equals_rhs, "lhs_equals_rhs",
935  "If features on lhs are the same as on rhs.", MS_NOT_AVAILABLE);
936  SG_ADD(&num_lhs, "num_lhs", "Number of feature vectors on left hand side.",
938  SG_ADD(&num_rhs, "num_rhs", "Number of feature vectors on right hand side.",
940  SG_ADD(&combined_kernel_weight, "combined_kernel_weight",
941  "Combined kernel weight.", MS_AVAILABLE);
942  SG_ADD(&optimization_initialized, "optimization_initialized",
943  "Optimization is initialized.", MS_NOT_AVAILABLE);
944  SG_ADD((machine_int_t*) &opt_type, "opt_type",
945  "Optimization type.", MS_NOT_AVAILABLE);
946  SG_ADD(&properties, "properties", "Kernel properties.", MS_NOT_AVAILABLE);
947  SG_ADD((CSGObject**) &normalizer, "normalizer", "Normalize the kernel.",
948  MS_AVAILABLE);
949 }
950 
951 
952 void CKernel::init()
953 {
954  cache_size=10;
955  kernel_matrix=NULL;
956  lhs=NULL;
957  rhs=NULL;
958  num_lhs=0;
959  num_rhs=0;
960  lhs_equals_rhs=false;
965  normalizer=NULL;
966 
967 #ifdef USE_SVMLIGHT
968  memset(&kernel_cache, 0x0, sizeof(KERNEL_CACHE));
969 #endif //USE_SVMLIGHT
970 
972 }
973 
974 namespace shogun
975 {
977 template <class T> struct K_THREAD_PARAM
978 {
982  int32_t start;
984  int32_t end;
986  int64_t total_start;
988  int64_t total_end;
990  int32_t m;
992  int32_t n;
994  T* result;
996  bool symmetric;
998  bool verbose;
999 };
1000 }
1001 
1003  bool no_diag)
1004 {
1005  SG_DEBUG("Entering\n");
1006 
1007  REQUIRE(has_features(), "No features assigned to kernel\n")
1008  REQUIRE(lhs_equals_rhs, "The kernel matrix is not symmetric!\n")
1009  REQUIRE(block_begin>=0 && block_begin<num_rhs,
1010  "Invalid block begin index (%d, %d)!\n", block_begin, block_begin)
1011  REQUIRE(block_begin+block_size<=num_rhs,
1012  "Invalid block size (%d) at starting index (%d, %d)! "
1013  "Please use smaller blocks!", block_size, block_begin, block_begin)
1014  REQUIRE(block_size>=1, "Invalid block size (%d)!\n", block_size)
1015 
1016  float64_t sum=0.0;
1017 
1018  // since the block is symmetric with main diagonal inside, we can save half
1019  // the computation with using only the upper triangular part.
1020  // this can be done in parallel
1021  #pragma omp parallel for reduction(+:sum)
1022  for (index_t i=0; i<block_size; ++i)
1023  {
1024  // compute the kernel values on the upper triangular part of the kernel
1025  // matrix and compute sum on the fly
1026  for (index_t j=i+1; j<block_size; ++j)
1027  {
1028  float64_t k=kernel(i+block_begin, j+block_begin);
1029  sum+=k;
1030  }
1031  }
1032 
1033  // the actual sum would be twice of what we computed
1034  sum*=2;
1035 
1036  // add the diagonal elements if required - keeping this check
1037  // outside of the loop to save cycles
1038  if (!no_diag)
1039  {
1040  #pragma omp parallel for reduction(+:sum)
1041  for (index_t i=0; i<block_size; ++i)
1042  {
1043  float64_t diag=kernel(i+block_begin, i+block_begin);
1044  sum+=diag;
1045  }
1046  }
1047 
1048  SG_DEBUG("Leaving\n");
1049 
1050  return sum;
1051 }
1052 
1053 float64_t CKernel::sum_block(index_t block_begin_row, index_t block_begin_col,
1054  index_t block_size_row, index_t block_size_col, bool no_diag)
1055 {
1056  SG_DEBUG("Entering\n");
1057 
1058  REQUIRE(has_features(), "No features assigned to kernel\n")
1059  REQUIRE(block_begin_row>=0 && block_begin_row<num_lhs &&
1060  block_begin_col>=0 && block_begin_col<num_rhs,
1061  "Invalid block begin index (%d, %d)!\n",
1062  block_begin_row, block_begin_col)
1063  REQUIRE(block_begin_row+block_size_row<=num_lhs &&
1064  block_begin_col+block_size_col<=num_rhs,
1065  "Invalid block size (%d, %d) at starting index (%d, %d)! "
1066  "Please use smaller blocks!", block_size_row, block_size_col,
1067  block_begin_row, block_begin_col)
1068  REQUIRE(block_size_row>=1 && block_size_col>=1,
1069  "Invalid block size (%d, %d)!\n", block_size_row, block_size_col)
1070 
1071  // check if removal of diagonal is required/valid
1072  if (no_diag && block_size_row!=block_size_col)
1073  {
1074  SG_WARNING("Not removing the main diagonal since block is not square!\n");
1075  no_diag=false;
1076  }
1077 
1078  float64_t sum=0.0;
1079 
1080  // this can be done in parallel for the rows/cols
1081  #pragma omp parallel for reduction(+:sum)
1082  for (index_t i=0; i<block_size_row; ++i)
1083  {
1084  // compute the kernel values and compute sum on the fly
1085  for (index_t j=0; j<block_size_col; ++j)
1086  {
1087  float64_t k=no_diag && i==j ? 0 :
1088  kernel(i+block_begin_row, j+block_begin_col);
1089  sum+=k;
1090  }
1091  }
1092 
1093  SG_DEBUG("Leaving\n");
1094 
1095  return sum;
1096 }
1097 
1099  index_t block_size, bool no_diag)
1100 {
1101  SG_DEBUG("Entering\n");
1102 
1103  REQUIRE(has_features(), "No features assigned to kernel\n")
1104  REQUIRE(lhs_equals_rhs, "The kernel matrix is not symmetric!\n")
1105  REQUIRE(block_begin>=0 && block_begin<num_rhs,
1106  "Invalid block begin index (%d, %d)!\n", block_begin, block_begin)
1107  REQUIRE(block_begin+block_size<=num_rhs,
1108  "Invalid block size (%d) at starting index (%d, %d)! "
1109  "Please use smaller blocks!", block_size, block_begin, block_begin)
1110  REQUIRE(block_size>=1, "Invalid block size (%d)!\n", block_size)
1111 
1112  // initialize the vector that accumulates the row/col-wise sum on the go
1113  SGVector<float64_t> row_sum(block_size);
1114  row_sum.set_const(0.0);
1115 
1116  // since the block is symmetric with main diagonal inside, we can save half
1117  // the computation with using only the upper triangular part.
1118  // this can be done in parallel for the rows/cols
1119  #pragma omp parallel for
1120  for (index_t i=0; i<block_size; ++i)
1121  {
1122  // compute the kernel values on the upper triangular part of the kernel
1123  // matrix and compute row-wise sum on the fly
1124  for (index_t j=i+1; j<block_size; ++j)
1125  {
1126  float64_t k=kernel(i+block_begin, j+block_begin);
1127  #pragma omp critical
1128  {
1129  row_sum[i]+=k;
1130  row_sum[j]+=k;
1131  }
1132  }
1133  }
1134 
1135  // add the diagonal elements if required - keeping this check
1136  // outside of the loop to save cycles
1137  if (!no_diag)
1138  {
1139  #pragma omp parallel for
1140  for (index_t i=0; i<block_size; ++i)
1141  {
1142  float64_t diag=kernel(i+block_begin, i+block_begin);
1143  row_sum[i]+=diag;
1144  }
1145  }
1146 
1147  SG_DEBUG("Leaving\n");
1148 
1149  return row_sum;
1150 }
1151 
1153  block_begin, index_t block_size, bool no_diag)
1154 {
1155  SG_DEBUG("Entering\n");
1156 
1157  REQUIRE(has_features(), "No features assigned to kernel\n")
1158  REQUIRE(lhs_equals_rhs, "The kernel matrix is not symmetric!\n")
1159  REQUIRE(block_begin>=0 && block_begin<num_rhs,
1160  "Invalid block begin index (%d, %d)!\n", block_begin, block_begin)
1161  REQUIRE(block_begin+block_size<=num_rhs,
1162  "Invalid block size (%d) at starting index (%d, %d)! "
1163  "Please use smaller blocks!", block_size, block_begin, block_begin)
1164  REQUIRE(block_size>=1, "Invalid block size (%d)!\n", block_size)
1165 
1166  // initialize the matrix that accumulates the row/col-wise sum on the go
1167  // the first column stores the sum of kernel values
1168  // the second column stores the sum of squared kernel values
1169  SGMatrix<float64_t> row_sum(block_size, 2);
1170  row_sum.set_const(0.0);
1171 
1172  // since the block is symmetric with main diagonal inside, we can save half
1173  // the computation with using only the upper triangular part
1174  // this can be done in parallel for the rows/cols
1175 #pragma omp parallel for
1176  for (index_t i=0; i<block_size; ++i)
1177  {
1178  // compute the kernel values on the upper triangular part of the kernel
1179  // matrix and compute row-wise sum and squared sum on the fly
1180  for (index_t j=i+1; j<block_size; ++j)
1181  {
1182  float64_t k=kernel(i+block_begin, j+block_begin);
1183 #pragma omp critical
1184  {
1185  row_sum(i, 0)+=k;
1186  row_sum(j, 0)+=k;
1187  row_sum(i, 1)+=k*k;
1188  row_sum(j, 1)+=k*k;
1189  }
1190  }
1191  }
1192 
1193  // add the diagonal elements if required - keeping this check
1194  // outside of the loop to save cycles
1195  if (!no_diag)
1196  {
1197 #pragma omp parallel for
1198  for (index_t i=0; i<block_size; ++i)
1199  {
1200  float64_t diag=kernel(i+block_begin, i+block_begin);
1201  row_sum(i, 0)+=diag;
1202  row_sum(i, 1)+=diag*diag;
1203  }
1204  }
1205 
1206  SG_DEBUG("Leaving\n");
1207 
1208  return row_sum;
1209 }
1210 
1212  index_t block_begin_col, index_t block_size_row,
1213  index_t block_size_col, bool no_diag)
1214 {
1215  SG_DEBUG("Entering\n");
1216 
1217  REQUIRE(has_features(), "No features assigned to kernel\n")
1218  REQUIRE(block_begin_row>=0 && block_begin_row<num_lhs &&
1219  block_begin_col>=0 && block_begin_col<num_rhs,
1220  "Invalid block begin index (%d, %d)!\n",
1221  block_begin_row, block_begin_col)
1222  REQUIRE(block_begin_row+block_size_row<=num_lhs &&
1223  block_begin_col+block_size_col<=num_rhs,
1224  "Invalid block size (%d, %d) at starting index (%d, %d)! "
1225  "Please use smaller blocks!", block_size_row, block_size_col,
1226  block_begin_row, block_begin_col)
1227  REQUIRE(block_size_row>=1 && block_size_col>=1,
1228  "Invalid block size (%d, %d)!\n", block_size_row, block_size_col)
1229 
1230  // check if removal of diagonal is required/valid
1231  if (no_diag && block_size_row!=block_size_col)
1232  {
1233  SG_WARNING("Not removing the main diagonal since block is not square!\n");
1234  no_diag=false;
1235  }
1236 
1237  // initialize the vector that accumulates the row/col-wise sum on the go
1238  // the first block_size_row entries store the row-wise sum of kernel values
1239  // the nextt block_size_col entries store the col-wise sum of kernel values
1240  SGVector<float64_t> sum(block_size_row+block_size_col);
1241  sum.set_const(0.0);
1242 
1243  // this can be done in parallel for the rows/cols
1244 #pragma omp parallel for
1245  for (index_t i=0; i<block_size_row; ++i)
1246  {
1247  // compute the kernel values and compute sum on the fly
1248  for (index_t j=0; j<block_size_col; ++j)
1249  {
1250  float64_t k=no_diag && i==j ? 0 :
1251  kernel(i+block_begin_row, j+block_begin_col);
1252 #pragma omp critical
1253  {
1254  sum[i]+=k;
1255  sum[j+block_size_row]+=k;
1256  }
1257  }
1258  }
1259 
1260  SG_DEBUG("Leaving\n");
1261 
1262  return sum;
1263 }
1264 
1265 template <class T> void* CKernel::get_kernel_matrix_helper(void* p)
1266 {
1267  K_THREAD_PARAM<T>* params= (K_THREAD_PARAM<T>*) p;
1268  int32_t i_start=params->start;
1269  int32_t i_end=params->end;
1270  CKernel* k=params->kernel;
1271  T* result=params->result;
1272  bool symmetric=params->symmetric;
1273  int32_t n=params->n;
1274  int32_t m=params->m;
1275  bool verbose=params->verbose;
1276  int64_t total_start=params->total_start;
1277  int64_t total_end=params->total_end;
1278  int64_t total=total_start;
1279 
1280  for (int32_t i=i_start; i<i_end; i++)
1281  {
1282  int32_t j_start=0;
1283 
1284  if (symmetric)
1285  j_start=i;
1286 
1287  for (int32_t j=j_start; j<n; j++)
1288  {
1289  float64_t v=k->kernel(i,j);
1290  result[i+j*m]=v;
1291 
1292  if (symmetric && i!=j)
1293  result[j+i*m]=v;
1294 
1295  if (verbose)
1296  {
1297  total++;
1298 
1299  if (symmetric && i!=j)
1300  total++;
1301 
1302  if (total%100 == 0)
1303  SG_OBJ_PROGRESS(k, total, total_start, total_end)
1304 
1306  break;
1307  }
1308  }
1309 
1310  }
1311 
1312  return NULL;
1313 }
1314 
1315 template <class T>
1317 {
1318  T* result = NULL;
1319 
1320  REQUIRE(has_features(), "no features assigned to kernel\n")
1321 
1322  int32_t m=get_num_vec_lhs();
1323  int32_t n=get_num_vec_rhs();
1324 
1325  int64_t total_num = int64_t(m)*n;
1326 
1327  // if lhs == rhs and sizes match assume k(i,j)=k(j,i)
1328  bool symmetric= (lhs && lhs==rhs && m==n);
1329 
1330  SG_DEBUG("returning kernel matrix of size %dx%d\n", m, n)
1331 
1332  result=SG_MALLOC(T, total_num);
1333 
1334  int32_t num_threads=parallel->get_num_threads();
1335  K_THREAD_PARAM<T> params;
1336  int64_t step = total_num/num_threads;
1337  index_t t = 0;
1338  #pragma omp parallel for lastprivate(t) private(params)
1339  for (t = 0; t < num_threads; ++t)
1340  {
1341  params.kernel = this;
1342  params.result = result;
1343  params.start = compute_row_start(t*step, n, symmetric);
1344  params.end = compute_row_start((t+1)*step, n, symmetric);
1345  params.total_start=t*step;
1346  params.total_end=(t+1)*step;
1347  params.n=n;
1348  params.m=m;
1349  params.symmetric=symmetric;
1350  params.verbose=false;
1351  CKernel::get_kernel_matrix_helper<T>((void*)&params);
1352  }
1353 
1354  if (total_num % num_threads != 0)
1355  {
1356  params.kernel = this;
1357  params.result = result;
1358  params.start = compute_row_start(t*step, n, symmetric);
1359  params.end = m;
1360  params.total_start=t*step;
1361  params.total_end=total_num;
1362  params.n=n;
1363  params.m=m;
1364  params.symmetric=symmetric;
1365  params.verbose=false;
1366  CKernel::get_kernel_matrix_helper<T>((void*)&params);
1367  }
1368 
1369  SG_DONE()
1370 
1371  return SGMatrix<T>(result,m,n,true);
1372 }
1373 
1374 
1375 template SGMatrix<float64_t> CKernel::get_kernel_matrix<float64_t>();
1376 template SGMatrix<float32_t> CKernel::get_kernel_matrix<float32_t>();
1377 
1378 template void* CKernel::get_kernel_matrix_helper<float64_t>(void* p);
1379 template void* CKernel::get_kernel_matrix_helper<float32_t>(void* p);
1380 
virtual void clear_normal()
Definition: Kernel.cpp:834
virtual const char * get_name() const =0
virtual void load_serializable_post()
Definition: Kernel.cpp:904
virtual bool init(CFeatures *lhs, CFeatures *rhs)
Definition: Kernel.cpp:96
virtual bool support_compatible_class() const
Definition: Features.h:323
int32_t compute_row_start(int64_t offs, int32_t n, bool symmetric)
#define SG_INFO(...)
Definition: SGIO.h:117
virtual void cleanup()
Definition: Kernel.cpp:171
#define SG_RESET_LOCALE
Definition: SGIO.h:85
#define SG_DONE()
Definition: SGIO.h:156
virtual void set_matrix(const bool *matrix, int32_t num_feat, int32_t num_vec)
Definition: File.cpp:126
virtual void compute_by_subkernel(int32_t vector_idx, float64_t *subkernel_contrib)
Definition: Kernel.cpp:844
void cache_multiple_kernel_rows(int32_t *key, int32_t varnum)
Definition: Kernel.cpp:374
virtual bool get_feature_class_compatibility(EFeatureClass rhs) const
Definition: Features.cpp:355
int32_t get_num_threads() const
Definition: Parallel.cpp:97
int32_t index_t
Definition: common.h:72
int32_t num_rhs
number of feature vectors on right hand side
static void * get_kernel_matrix_helper(void *p)
Definition: Kernel.cpp:1265
Class ShogunException defines an exception which is thrown whenever an error inside of shogun occurs...
virtual bool set_normalizer(CKernelNormalizer *normalizer)
Definition: Kernel.cpp:148
virtual float64_t sum_block(index_t block_begin_row, index_t block_begin_col, index_t block_size_row, index_t block_size_col, bool no_diag=false)
Definition: Kernel.cpp:1053
virtual int32_t get_num_vectors() const =0
virtual void save_serializable_pre()
Definition: SGObject.cpp:464
#define SG_ERROR(...)
Definition: SGIO.h:128
#define REQUIRE(x,...)
Definition: SGIO.h:205
virtual bool delete_optimization()
Definition: Kernel.cpp:810
int64_t KERNELCACHE_IDX
Definition: kernel/Kernel.h:46
int32_t kernel_cache_space_available()
index_t num_cols
Definition: SGMatrix.h:465
float64_t kernel(int32_t idx_a, int32_t idx_b)
#define ENUM_CASE(n)
Definition: Kernel.cpp:681
Parallel * parallel
Definition: SGObject.h:561
virtual void remove_rhs()
takes all necessary steps if the rhs is removed from kernel
Definition: Kernel.cpp:668
virtual int32_t get_num_vec_lhs()
SGMatrix< float64_t > get_kernel_matrix()
#define SG_REF(x)
Definition: SGObject.h:52
#define SG_SET_LOCALE_C
Definition: SGIO.h:84
int32_t cache_size
cache_size in MB
index_t num_rows
Definition: SGMatrix.h:463
void kernel_cache_shrink(int32_t totdoc, int32_t num_shrink, int32_t *after)
Definition: Kernel.cpp:470
bool get_is_initialized()
virtual SGMatrix< float64_t > row_wise_sum_squared_sum_symmetric_block(index_t block_begin, index_t block_size, bool no_diag=true)
Definition: Kernel.cpp:1152
float64_t combined_kernel_weight
virtual void register_params()
Definition: Kernel.cpp:927
void save(CFile *writer)
Definition: Kernel.cpp:627
virtual void remove_lhs_and_rhs()
Definition: Kernel.cpp:635
index_t vlen
Definition: SGVector.h:545
virtual CKernelNormalizer * get_normalizer()
Definition: Kernel.cpp:160
#define ASSERT(x)
Definition: SGIO.h:200
Class SGObject is the base class of all shogun objects.
Definition: SGObject.h:125
virtual SGVector< float64_t > row_col_wise_sum_block(index_t block_begin_row, index_t block_begin_col, index_t block_size_row, index_t block_size_col, bool no_diag=false)
Definition: Kernel.cpp:1211
void cache_kernel_row(int32_t x)
Definition: Kernel.cpp:300
#define SG_OBJ_PROGRESS(o,...)
Definition: SGIO.h:146
virtual float64_t sum_symmetric_block(index_t block_begin, index_t block_size, bool no_diag=true)
Definition: Kernel.cpp:1002
virtual SGVector< float64_t > get_subkernel_weights()
Definition: Kernel.cpp:856
double float64_t
Definition: common.h:60
KERNEL_CACHE kernel_cache
kernel cache
virtual EFeatureType get_feature_type()=0
KERNELCACHE_ELEM * kernel_matrix
A File access base class.
Definition: File.h:34
virtual void save_serializable_post()
Definition: Kernel.cpp:919
virtual float64_t compute_optimized(int32_t vector_idx)
Definition: Kernel.cpp:816
EOptimizationType get_optimization_type()
virtual void save_serializable_post()
Definition: SGObject.cpp:469
void list_kernel()
Definition: Kernel.cpp:683
float64_t get_alpha(int32_t idx)
float64_t get_combined_kernel_weight()
virtual SGVector< float64_t > row_wise_sum_symmetric_block(index_t block_begin, index_t block_size, bool no_diag=true)
Definition: Kernel.cpp:1098
virtual EFeatureClass get_feature_class() const =0
Identity Kernel Normalization, i.e. no normalization is applied.
int32_t num_lhs
number of feature vectors on left hand side
The class Kernel Normalizer defines a function to post-process kernel values.
int32_t get_support_vector(int32_t idx)
static bool cancel_computations()
Definition: Signal.h:111
virtual int32_t get_num_vec_rhs()
virtual void set_subkernel_weights(SGVector< float64_t > weights)
Definition: Kernel.cpp:863
virtual bool init_normalizer()
Definition: Kernel.cpp:166
bool optimization_initialized
EOptimizationType opt_type
void load(CFile *loader)
Definition: Kernel.cpp:621
virtual void load_serializable_post()
Definition: SGObject.cpp:459
CFeatures * rhs
feature vectors to occur on right hand side
static CKernel * obtain_from_generic(CSGObject *kernel)
Definition: Kernel.cpp:872
#define SG_UNREF(x)
Definition: SGObject.h:53
#define SG_DEBUG(...)
Definition: SGIO.h:106
all of classes and functions are contained in the shogun namespace
Definition: class_list.h:18
virtual bool init(CKernel *k)=0
virtual void compute_batch(int32_t num_vec, int32_t *vec_idx, float64_t *target, int32_t num_suppvec, int32_t *IDX, float64_t *alphas, float64_t factor=1.0)
Definition: Kernel.cpp:822
T sum(const Container< T > &a, bool no_diag=false)
bool lhs_equals_rhs
lhs
int machine_int_t
Definition: common.h:69
virtual EKernelType get_kernel_type()=0
virtual bool init_optimization(int32_t count, int32_t *IDX, float64_t *weights)
Definition: Kernel.cpp:803
CFeatures * lhs
feature vectors to occur on left hand side
The class Features is the base class of all feature objects.
Definition: Features.h:68
virtual void save_serializable_pre()
Definition: Kernel.cpp:911
void kernel_cache_cleanup()
Definition: Kernel.cpp:542
virtual void remove_lhs()
Definition: Kernel.cpp:654
int32_t kernel_cache_check(int32_t cacheidx)
virtual int32_t get_num_subkernels()
Definition: Kernel.cpp:839
bool init_optimization_svm(CSVM *svm)
Definition: Kernel.cpp:886
A generic Support Vector Machine Interface.
Definition: SVM.h:49
void kernel_cache_reset_lru()
Definition: Kernel.cpp:529
The Kernel base class.
CKernelNormalizer * normalizer
virtual SGVector< float64_t > get_kernel_row(int32_t i)
#define SG_WARNING(...)
Definition: SGIO.h:127
#define SG_ADD(...)
Definition: SGObject.h:94
virtual bool has_features()
void kernel_cache_init(int32_t size, bool regression_hack=false)
Definition: Kernel.cpp:179
virtual ~CKernel()
Definition: Kernel.cpp:71
virtual void add_to_normal(int32_t vector_idx, float64_t weight)
Definition: Kernel.cpp:829
void set_const(T const_elem)
Definition: SGMatrix.cpp:209
float64_t KERNELCACHE_ELEM
Definition: kernel/Kernel.h:35
void set_const(T const_elem)
Definition: SGVector.cpp:184
void resize_kernel_cache(KERNELCACHE_IDX size, bool regression_hack=false)
Definition: Kernel.cpp:83
virtual EFeatureType get_feature_type() const =0
virtual EFeatureClass get_feature_class()=0

SHOGUN Machine Learning Toolbox - Documentation