SHOGUN  4.1.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
Kernel.h
Go to the documentation of this file.
1 /*
2  * EXCEPT FOR THE KERNEL CACHING FUNCTIONS WHICH ARE (W) THORSTEN JOACHIMS
3  * COPYRIGHT (C) 1999 UNIVERSITAET DORTMUND - ALL RIGHTS RESERVED
4  *
5  * this program is free software; you can redistribute it and/or modify
6  * it under the terms of the GNU General Public License as published by
7  * the Free Software Foundation; either version 3 of the License, or
8  * (at your option) any later version.
9  *
10  * Written (W) 1999-2009 Soeren Sonnenburg
11  * Written (W) 1999-2008 Gunnar Raetsch
12  * Copyright (C) 1999-2009 Fraunhofer Institute FIRST and Max-Planck-Society
13  */
14 
15 #ifndef _KERNEL_H___
16 #define _KERNEL_H___
17 
18 #include <shogun/lib/config.h>
19 
20 #include <shogun/lib/common.h>
21 #include <shogun/lib/Signal.h>
22 #include <shogun/io/SGIO.h>
23 #include <shogun/io/File.h>
26 #include <shogun/base/SGObject.h>
27 #include <shogun/lib/SGMatrix.h>
30 
31 namespace shogun
32 {
33  class CFile;
34  class CFeatures;
36 
37 #ifdef USE_SHORTREAL_KERNELCACHE
38 
40 #else
41 
43 #endif
44 
46 typedef int64_t KERNELCACHE_IDX;
47 
48 
51 {
54 };
55 
58 {
59  K_UNKNOWN = 0,
60  K_LINEAR = 10,
61  K_POLY = 20,
62  K_GAUSSIAN = 30,
66  K_SALZBERG = 41,
74  K_POLYMATCH = 100,
75  K_ALIGNMENT = 110,
80  K_COMBINED = 140,
81  K_AUC = 150,
82  K_CUSTOM = 160,
83  K_SIGMOID = 170,
84  K_CHI2 = 180,
85  K_DIAG = 190,
86  K_CONST = 200,
87  K_DISTANCE = 220,
90  K_OLIGO = 250,
91  K_MATCHWORD = 260,
92  K_TPPK = 270,
96  K_WAVELET = 310,
97  K_WAVE = 320,
98  K_CAUCHY = 330,
99  K_TSTUDENT = 340,
103  K_SPHERICAL = 380,
104  K_SPLINE = 390,
105  K_ANOVA = 400,
106  K_POWER = 410,
107  K_LOG = 420,
108  K_CIRCULAR = 430,
111  K_BESSEL = 460,
113  K_DIRECTOR = 480,
114  K_PRODUCT = 490,
115  K_LINEARARD = 500,
118  K_STREAMING = 520,
120 };
121 
124 {
125  KP_NONE = 0,
126  KP_LINADD = 1, // Kernels that can be optimized via doing normal updates w + dw
127  KP_KERNCOMBINATION = 2, // Kernels that are infact a linear combination of subkernels K=\sum_i b_i*K_i
128  KP_BATCHEVALUATION = 4 // Kernels that can on the fly generate normals in linadd and more quickly/memory efficient process batches instead of single examples
129 };
130 
131 class CSVM;
132 
158 class CKernel : public CSGObject
159 {
170  friend class CDiceKernelNormalizer;
172 
173  friend class CStreamingKernel;
174 
175  public:
176 
180  CKernel();
181 
182 
187  CKernel(int32_t size);
188 
195  CKernel(CFeatures* l, CFeatures* r, int32_t size);
196 
197  virtual ~CKernel();
198 
206  inline float64_t kernel(int32_t idx_a, int32_t idx_b)
207  {
208  REQUIRE(idx_a>=0 && idx_b>=0 && idx_a<num_lhs && idx_b<num_rhs,
209  "%s::kernel(): index out of Range: idx_a=%d/%d idx_b=%d/%d\n",
210  get_name(), idx_a,num_lhs, idx_b,num_rhs);
211 
212  return normalizer->normalize(compute(idx_a, idx_b), idx_a, idx_b);
213  }
214 
220  {
221  return get_kernel_matrix<float64_t>();
222  }
223 
231  preallocated=SGVector<float64_t>())
232  {
233  REQUIRE(lhs, "CKernel::get_kernel_diagonal(): Left-handside "
234  "features missing!\n");
235 
236  REQUIRE(rhs, "CKernel::get_kernel_diagonal(): Right-handside "
237  "features missing!\n");
238 
239  int32_t length=CMath::min(lhs->get_num_vectors(),rhs->get_num_vectors());
240 
241  /* allocate space if necessary */
242  if (!preallocated.vector)
243  preallocated=SGVector<float64_t>(length);
244  else
245  {
246  REQUIRE(preallocated.vlen==length,
247  "%s::get_kernel_diagonal(): Preallocated vector has"
248  " wrong size!\n", get_name());
249  }
250 
251  for (index_t i=0; i<preallocated.vlen; ++i)
252  preallocated[i]=kernel(i, i);
253 
254  return preallocated;
255  }
256 
263  {
264 
266 
267  for (int32_t i=0; i!=num_rhs; i++)
268  col[i] = kernel(i,j);
269 
270  return col;
271  }
272 
273 
280  {
282 
283  for (int32_t j=0; j!=num_lhs; j++)
284  row[j] = kernel(i,j);
285 
286  return row;
287  }
288 
312  virtual float64_t sum_symmetric_block(index_t block_begin,
313  index_t block_size, bool no_diag=true);
314 
343  virtual float64_t sum_block(index_t block_begin_row,
344  index_t block_begin_col, index_t block_size_row,
345  index_t block_size_col, bool no_diag=false);
346 
371  block_begin, index_t block_size, bool no_diag=true);
372 
403  index_t block_begin, index_t block_size, bool no_diag=true);
404 
441  index_t block_begin_row, index_t block_begin_col,
442  index_t block_size_row, index_t block_size_col,
443  bool no_diag=false);
444 
449  template <class T> SGMatrix<T> get_kernel_matrix();
450 
461  virtual bool init(CFeatures* lhs, CFeatures* rhs);
462 
468 
474 
478  virtual bool init_normalizer();
479 
486  virtual void cleanup();
487 
492  void load(CFile* loader);
493 
498  void save(CFile* writer);
499 
504  inline CFeatures* get_lhs() { SG_REF(lhs); return lhs; }
505 
510  inline CFeatures* get_rhs() { SG_REF(rhs); return rhs; }
511 
516  virtual int32_t get_num_vec_lhs()
517  {
518  return num_lhs;
519  }
520 
525  virtual int32_t get_num_vec_rhs()
526  {
527  return num_rhs;
528  }
529 
534  virtual bool has_features()
535  {
536  return lhs && rhs;
537  }
538 
543  inline bool get_lhs_equals_rhs()
544  {
545  return lhs_equals_rhs;
546  }
547 
549  virtual void remove_lhs_and_rhs();
550 
552  virtual void remove_lhs();
553 
555  virtual void remove_rhs();
556 
564  virtual EKernelType get_kernel_type()=0 ;
565 
572  virtual EFeatureType get_feature_type()=0;
573 
580  virtual EFeatureClass get_feature_class()=0;
581 
586  inline void set_cache_size(int32_t size)
587  {
588  cache_size = size;
589 #ifdef USE_SVMLIGHT
590  cache_reset();
591 #endif //USE_SVMLIGHT
592  }
593 
598  inline int32_t get_cache_size() { return cache_size; }
599 
600 #ifdef USE_SVMLIGHT
601 
603 
608  inline int32_t get_max_elems_cache() { return kernel_cache.max_elems; }
609 
614  inline int32_t get_activenum_cache() { return kernel_cache.activenum; }
615 
623  void get_kernel_row(
624  int32_t docnum, int32_t *active2dnum, float64_t *buffer,
625  bool full_line=false);
626 
631  void cache_kernel_row(int32_t x);
632 
638  void cache_multiple_kernel_rows(int32_t* key, int32_t varnum);
639 
641  void kernel_cache_reset_lru();
642 
649  void kernel_cache_shrink(
650  int32_t totdoc, int32_t num_shrink, int32_t *after);
651 
658  bool regression_hack=false);
659 
664  inline void set_time(int32_t t)
665  {
666  kernel_cache.time=t;
667  }
668 
674  inline int32_t kernel_cache_touch(int32_t cacheidx)
675  {
676  if(kernel_cache.index[cacheidx] != -1)
677  {
678  kernel_cache.lru[kernel_cache.index[cacheidx]]=kernel_cache.time;
679  return(1);
680  }
681  return(0);
682  }
683 
689  inline int32_t kernel_cache_check(int32_t cacheidx)
690  {
691  return(kernel_cache.index[cacheidx] >= 0);
692  }
693 
699  {
700  return(kernel_cache.elems < kernel_cache.max_elems);
701  }
702 
708  void kernel_cache_init(int32_t size, bool regression_hack=false);
709 
711  void kernel_cache_cleanup();
712 
713 #endif //USE_SVMLIGHT
714 
716  void list_kernel();
717 
723  inline bool has_property(EKernelProperty p) { return (properties & p) != 0; }
724 
728  virtual void clear_normal();
729 
735  virtual void add_to_normal(int32_t vector_idx, float64_t weight);
736 
742 
748 
754 
762  virtual bool init_optimization(
763  int32_t count, int32_t *IDX, float64_t *weights);
764 
769  virtual bool delete_optimization();
770 
776  bool init_optimization_svm(CSVM * svm) ;
777 
783  virtual float64_t compute_optimized(int32_t vector_idx);
784 
793  virtual void compute_batch(
794  int32_t num_vec, int32_t* vec_idx, float64_t* target,
795  int32_t num_suppvec, int32_t* IDX, float64_t* alphas,
796  float64_t factor=1.0);
797 
803 
809 
814  virtual int32_t get_num_subkernels();
815 
821  virtual void compute_by_subkernel(
822  int32_t vector_idx, float64_t * subkernel_contrib);
823 
829  virtual const float64_t* get_subkernel_weights(int32_t& num_weights);
830 
836 
841  virtual void set_subkernel_weights(SGVector<float64_t> weights);
842 
851  const TParameter* param, index_t index=-1)
852  {
853  SG_ERROR("Can't compute derivative wrt %s parameter\n", param->m_name)
854  return SGMatrix<float64_t>();
855  }
856 
865  const TParameter* param, index_t index=-1)
866  {
867  return get_parameter_gradient(param,index).get_diagonal_vector();
868  }
869 
876  protected:
882  {
883  properties |= p;
884  }
885 
891  {
892  properties &= (properties | p) ^ p;
893  }
894 
899  inline void set_is_initialized(bool p_init) { optimization_initialized=p_init; }
900 
911  virtual float64_t compute(int32_t x, int32_t y)=0;
912 
919  int32_t compute_row_start(int64_t offs, int32_t n, bool symmetric)
920  {
921  int32_t i_start;
922 
923  if (symmetric)
924  i_start=(int32_t) CMath::floor(n-CMath::sqrt(CMath::sq((float64_t) n)-offs));
925  else
926  i_start=(int32_t) (offs/int64_t(n));
927 
928  return i_start;
929  }
930 
935  template <class T> static void* get_kernel_matrix_helper(void* p);
936 
945  virtual void load_serializable_post() throw (ShogunException);
946 
955  virtual void save_serializable_pre() throw (ShogunException);
956 
965  virtual void save_serializable_post() throw (ShogunException);
966 
971  virtual void register_params();
972 
973  private:
976  void init();
977 
978 
979 #ifdef USE_SVMLIGHT
980 #ifndef DOXYGEN_SHOULD_SKIP_THIS
981 
982  struct KERNEL_CACHE {
984  int32_t *index;
986  int32_t *invindex;
988  int32_t *active2totdoc;
990  int32_t *totdoc2active;
992  int32_t *lru;
994  int32_t *occu;
996  int32_t elems;
998  int32_t max_elems;
1000  int32_t time;
1002  int32_t activenum;
1003 
1005  KERNELCACHE_ELEM *buffer;
1007  KERNELCACHE_IDX buffsize;
1008  };
1009 
1011  struct S_KTHREAD_PARAM
1012  {
1014  CKernel* kernel;
1016  KERNEL_CACHE* kernel_cache;
1018  KERNELCACHE_ELEM** cache;
1020  int32_t* uncached_rows;
1022  int32_t num_uncached;
1024  uint8_t* needs_computation;
1026  int32_t start;
1028  int32_t end;
1030  int32_t num_vectors;
1031  };
1032 #endif // DOXYGEN_SHOULD_SKIP_THIS
1033 
1035  static void* cache_multiple_kernel_row_helper(void* p);
1036 
1038  void kernel_cache_free(int32_t cacheidx);
1039  int32_t kernel_cache_malloc();
1040  int32_t kernel_cache_free_lru();
1041  KERNELCACHE_ELEM *kernel_cache_clean_and_malloc(int32_t cacheidx);
1042 #endif //USE_SVMLIGHT
1043 
1044 
1045  protected:
1047  int32_t cache_size;
1048 
1049 #ifdef USE_SVMLIGHT
1050 
1051  KERNEL_CACHE kernel_cache;
1052 #endif //USE_SVMLIGHT
1053 
1057 
1062 
1065 
1067  int32_t num_lhs;
1069  int32_t num_rhs;
1070 
1073 
1080 
1082  uint64_t properties;
1083 
1087 };
1088 
1089 }
1090 #endif /* _KERNEL_H__ */

SHOGUN Machine Learning Toolbox - Documentation