SHOGUN  5.0.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules
ExplicitSpecFeatures.cpp
Go to the documentation of this file.
1 /*
2  * This program is free software; you can redistribute it and/or modify
3  * it under the terms of the GNU General Public License as published by
4  * the Free Software Foundation; either version 3 of the License, or
5  * (at your option) any later version.
6  *
7  * Written (W) 2009 Soeren Sonnenburg
8  * Copyright (C) 2009 Fraunhofer Institute FIRST and Max-Planck-Society
9  */
10 
13 #include <shogun/io/SGIO.h>
14 
15 using namespace shogun;
16 
18 {
19  SG_UNSTABLE("CExplicitSpecFeatures::CExplicitSpecFeatures()",
20  "\n");
21 
22  use_normalization = false;
23  num_strings = 0;
24  alphabet_size = 0;
25 
26  spec_size = 0;
27  k_spectrum = NULL;
28 }
29 
30 
32 {
33  ASSERT(str)
34 
35  use_normalization=normalize;
37  spec_size = str->get_num_symbols();
38 
40 
41  SG_DEBUG("SPEC size=%d, num_str=%d\n", spec_size, num_strings)
42 }
43 
45  num_strings(orig.num_strings), alphabet_size(orig.alphabet_size), spec_size(orig.spec_size)
46 {
47  k_spectrum= SG_MALLOC(float64_t*, num_strings);
48  for (int32_t i=0; i<num_strings; i++)
50 }
51 
53 {
55 }
56 
58 {
59  return spec_size;
60 }
61 
62 float64_t CExplicitSpecFeatures::dot(int32_t vec_idx1, CDotFeatures* df, int32_t vec_idx2)
63 {
64  ASSERT(df)
68 
69  ASSERT(vec_idx1 < num_strings)
70  ASSERT(vec_idx2 < sf->num_strings)
71  float64_t* vec1=k_spectrum[vec_idx1];
72  float64_t* vec2=sf->k_spectrum[vec_idx2];
73 
74  return CMath::dot(vec1, vec2, spec_size);
75 }
76 
77 float64_t CExplicitSpecFeatures::dense_dot(int32_t vec_idx1, const float64_t* vec2, int32_t vec2_len)
78 {
79  ASSERT(vec2_len == spec_size)
80  ASSERT(vec_idx1 < num_strings)
81  float64_t* vec1=k_spectrum[vec_idx1];
82  float64_t result=0;
83 
84  for (int32_t i=0; i<spec_size; i++)
85  result+=vec1[i]*vec2[i];
86 
87  return result;
88 }
89 
90 void CExplicitSpecFeatures::add_to_dense_vec(float64_t alpha, int32_t vec_idx1, float64_t* vec2, int32_t vec2_len, bool abs_val)
91 {
92  ASSERT(vec2_len == spec_size)
93  ASSERT(vec_idx1 < num_strings)
94  float64_t* vec1=k_spectrum[vec_idx1];
95 
96  if (abs_val)
97  {
98  for (int32_t i=0; i<spec_size; i++)
99  vec2[i]+=alpha*CMath::abs(vec1[i]);
100  }
101  else
102  {
103  for (int32_t i=0; i<spec_size; i++)
104  vec2[i]+=alpha*vec1[i];
105  }
106 }
107 
109 {
110  k_spectrum= SG_MALLOC(float64_t*, num_strings);
111 
112  for (int32_t i=0; i<num_strings; i++)
113  {
114  k_spectrum[i]=SG_MALLOC(float64_t, spec_size);
115  memset(k_spectrum[i], 0, sizeof(float64_t)*spec_size);
116 
117  int32_t len=0;
118  bool free_fv;
119  uint16_t* fv=str->get_feature_vector(i, len, free_fv);
120 
121  for (int32_t j=0; j<len; j++)
122  k_spectrum[i][fv[j]]++;
123 
124  str->free_feature_vector(fv, i, free_fv);
125 
126  if (use_normalization)
127  {
128  float64_t n=0;
129  for (int32_t j=0; j<spec_size; j++)
130  n+=CMath::sq(k_spectrum[i][j]);
131 
132  n=CMath::sqrt(n);
133 
134  for (int32_t j=0; j<spec_size; j++)
135  k_spectrum[i][j]/=n;
136  }
137  }
138 }
139 
141 {
142  for (int32_t i=0; i<num_strings; i++)
143  SG_FREE(k_spectrum[i]);
144 
145  SG_FREE(k_spectrum);
146  k_spectrum=NULL;
147 }
148 
150 {
151  return new CExplicitSpecFeatures(*this);
152 }
153 
154 
155 
157 {
159  return NULL;
160 }
161 
162 bool CExplicitSpecFeatures::get_next_feature(int32_t& index, float64_t& value, void* iterator)
163 {
165  return false;
166 }
167 
169 {
171 }
172 
174 {
176  return 0;
177 }
178 
180 {
181  return F_UNKNOWN;
182 }
183 
185 {
186  return C_SPEC;
187 }
188 
190 {
191  return num_strings;
192 }
SGVector< ST > get_feature_vector(int32_t num)
void obtain_kmer_spectrum(CStringFeatures< uint16_t > *str)
virtual float64_t dot(int32_t vec_idx1, CDotFeatures *df, int32_t vec_idx2)
virtual bool get_next_feature(int32_t &index, float64_t &value, void *iterator)
virtual int32_t get_dim_feature_space() const
virtual int32_t get_num_vectors() const
static T sq(T x)
Definition: Math.h:450
#define SG_NOTIMPLEMENTED
Definition: SGIO.h:139
virtual CFeatures * duplicate() const
void free_feature_vector(ST *feat_vec, int32_t num, bool dofree)
Features that support dot products among other operations.
Definition: DotFeatures.h:44
EFeatureClass
shogun feature class
Definition: FeatureTypes.h:38
#define ASSERT(x)
Definition: SGIO.h:201
shogun vector
virtual void add_to_dense_vec(float64_t alpha, int32_t vec_idx1, float64_t *vec2, int32_t vec2_len, bool abs_val=false)
double float64_t
Definition: common.h:50
virtual EFeatureClass get_feature_class() const =0
virtual int32_t get_num_vectors() const
static float64_t dot(const bool *v1, const bool *v2, int32_t n)
Compute dot product between v1 and v2 (blas optimized)
Definition: Math.h:627
Features that compute the Spectrum Kernel feature space explicitly.
virtual EFeatureType get_feature_type() const
EFeatureType
shogun feature type
Definition: FeatureTypes.h:19
virtual void free_feature_iterator(void *iterator)
#define SG_DEBUG(...)
Definition: SGIO.h:107
all of classes and functions are contained in the shogun namespace
Definition: class_list.h:18
The class Features is the base class of all feature objects.
Definition: Features.h:68
virtual float64_t dense_dot(int32_t vec_idx1, const float64_t *vec2, int32_t vec2_len)
static float32_t sqrt(float32_t x)
Definition: Math.h:459
virtual int32_t get_nnz_features_for_vector(int32_t num)
#define SG_UNSTABLE(func,...)
Definition: SGIO.h:132
virtual EFeatureType get_feature_type() const =0
virtual EFeatureClass get_feature_class() const
static T abs(T a)
Definition: Math.h:179
virtual void * get_feature_iterator(int32_t vector_index)

SHOGUN Machine Learning Toolbox - Documentation