SHOGUN  5.0.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules
OligoStringKernel.h
Go to the documentation of this file.
1 /*
2  * This program is free software; you can redistribute it and/or modify
3  * it under the terms of the GNU General Public License as published by
4  * the Free Software Foundation; either version 3 of the License, or
5  * (at your option) any later version.
6  *
7  * Written (W) 2008 Christian Igel, Tobias Glasmachers
8  * Copyright (C) 2008 Christian Igel, Tobias Glasmachers
9  *
10  * Shogun adjustments (W) 2008-2009,2013 Soeren Sonnenburg
11  * Copyright (C) 2008-2009 Fraunhofer Institute FIRST and Max-Planck-Society
12  * Copyright (C) 2013 Soeren Sonnenburg
13  */
14 #ifndef _OLIGOSTRINGKERNEL_H_
15 #define _OLIGOSTRINGKERNEL_H_
16 
17 #include <shogun/lib/config.h>
18 
20 
21 #include <vector>
22 #include <string>
23 
24 namespace shogun
25 {
44 class COligoStringKernel : public CStringKernel<char>
45 {
46  public:
49 
55  COligoStringKernel(int32_t cache_size, int32_t k, float64_t width);
56 
65  int32_t k, float64_t width);
66 
68  virtual ~COligoStringKernel();
69 
76  virtual bool init(CFeatures* l, CFeatures* r);
77 
82  virtual EKernelType get_kernel_type() { return K_OLIGO; }
83 
88  virtual const char* get_name() const { return "OligoStringKernel"; }
89 
90 
91  virtual float64_t compute(int32_t x, int32_t y);
92 
95  virtual void cleanup();
96 
97  protected:
111  static void encodeOligo(
112  const std::string& sequence, uint32_t k_mer_length,
113  const std::string& allowed_characters,
114  std::vector< std::pair<int32_t, float64_t> >& values);
115 
123  static void getSequences(
124  const std::vector<std::string>& sequences,
125  uint32_t k_mer_length, const std::string& allowed_characters,
126  std::vector< std::vector< std::pair<int32_t, float64_t> > >& encoded_sequences);
127 
144  const std::vector< std::pair<int32_t, float64_t> >& x,
145  const std::vector< std::pair<int32_t, float64_t> >& y,
146  int32_t max_distance = -1);
147 
156  const std::vector< std::pair<int32_t, float64_t> >& x,
157  const std::vector< std::pair<int32_t, float64_t> >& y);
158 
159 
160  private:
171  void getExpFunctionCache(uint32_t sequence_length);
172 
173  static inline bool cmpOligos_(std::pair<int32_t, float64_t> a,
174  std::pair<int32_t, float64_t> b )
175  {
176  return (a.second < b.second);
177  }
178 
179  void init();
180 
181  protected:
183  int32_t k;
188 };
189 }
190 #endif // _OLIGOSTRINGKERNEL_H_
virtual bool init(CFeatures *l, CFeatures *r)
virtual EKernelType get_kernel_type()
EKernelType
Definition: Kernel.h:57
float64_t kernelOligo(const std::vector< std::pair< int32_t, float64_t > > &x, const std::vector< std::pair< int32_t, float64_t > > &y)
returns the value of the oligo kernel for sequences 'x' and 'y'
virtual float64_t compute(int32_t x, int32_t y)
This class offers access to the Oligo Kernel introduced by Meinicke et al. in 2004.
SGVector< float64_t > gauss_table
int32_t cache_size
cache_size in MB
Definition: Kernel.h:1048
static void encodeOligo(const std::string &sequence, uint32_t k_mer_length, const std::string &allowed_characters, std::vector< std::pair< int32_t, float64_t > > &values)
encodes the signals of the sequence
double float64_t
Definition: common.h:50
static void getSequences(const std::vector< std::string > &sequences, uint32_t k_mer_length, const std::string &allowed_characters, std::vector< std::vector< std::pair< int32_t, float64_t > > > &encoded_sequences)
encodes all sequences with the encodeOligo function and stores them in 'encoded_sequences' ...
virtual const char * get_name() const
all of classes and functions are contained in the shogun namespace
Definition: class_list.h:18
The class Features is the base class of all feature objects.
Definition: Features.h:68
Template class StringKernel, is the base class of all String Kernels.
Definition: StringKernel.h:26
float64_t kernelOligoFast(const std::vector< std::pair< int32_t, float64_t > > &x, const std::vector< std::pair< int32_t, float64_t > > &y, int32_t max_distance=-1)
returns the value of the oligo kernel for sequences 'x' and 'y'

SHOGUN Machine Learning Toolbox - Documentation