SHOGUN  3.2.1
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
ImplicitWeightedSpecFeatures.cpp
Go to the documentation of this file.
1 /*
2  * This program is free software; you can redistribute it and/or modify
3  * it under the terms of the GNU General Public License as published by
4  * the Free Software Foundation; either version 3 of the License, or
5  * (at your option) any later version.
6  *
7  * Written (W) 2009 Soeren Sonnenburg
8  * Copyright (C) 2009 Fraunhofer Institute FIRST and Max-Planck-Society
9  */
10 
12 #include <shogun/io/SGIO.h>
13 
14 using namespace shogun;
15 
17  :CDotFeatures()
18 {
19  SG_UNSTABLE("CImplicitWeightedSpecFeatures::"
20  "CImplicitWeightedSpecFeatures()", "\n");
21 
22  strings = NULL;
23  normalization_factors = NULL;
24  num_strings = 0;
25  alphabet_size = 0;
26 
27  degree = 0;
28  spec_size = 0;
29  spec_weights = 0;
30 }
31 
33 {
34  ASSERT(str)
35  strings=str;
38  spec_weights=NULL;
41  degree=str->get_order();
43 
44  SG_DEBUG("WEIGHTED SPEC alphasz=%d, size=%d, num_str=%d\n", alphabet_size,
46 
47  if (normalize)
49 }
50 
52 {
53  float64_t* factors=SG_MALLOC(float64_t, num_strings);
54 
55  for (int32_t i=0; i<num_strings; i++)
56  factors[i]=1.0/CMath::sqrt(dot(i, this, i));
57 
58  normalization_factors=factors;
59  //CMath::display_vector(normalization_factors, num_strings, "n");
60 }
61 
63 {
64  SG_FREE(spec_weights);
65  spec_weights=SG_MALLOC(float64_t, degree);
66 
67  int32_t i;
68  float64_t sum=0;
69  spec_size=0;
70 
71  for (i=0; i<degree; i++)
72  {
74  spec_weights[i]=degree-i;
75  sum+=spec_weights[i];
76  }
77  for (i=0; i<degree; i++)
79 
80  return spec_weights!=NULL;
81 }
82 
84 {
85  ASSERT(d==degree)
86 
87  SG_FREE(spec_weights);
88  spec_weights=SG_MALLOC(float64_t, degree);
89  for (int32_t i=0; i<degree; i++)
90  spec_weights[i]=CMath::sqrt(w[i]);
91  return true;
92 }
93 
95  num_strings(orig.num_strings),
96  alphabet_size(orig.alphabet_size), spec_size(orig.spec_size)
97 {
99  SG_REF(strings);
100 }
101 
103 {
104  SG_UNREF(strings);
105  SG_FREE(spec_weights);
106  SG_FREE(normalization_factors);
107 }
108 
109 float64_t CImplicitWeightedSpecFeatures::dot(int32_t vec_idx1, CDotFeatures* df, int32_t vec_idx2)
110 {
111  ASSERT(df)
115 
116  ASSERT(vec_idx1 < num_strings)
117  ASSERT(vec_idx2 < sf->get_num_vectors())
118 
119  int32_t len1=-1;
120  int32_t len2=-1;
121  bool free_vec1;
122  bool free_vec2;
123  uint16_t* vec1=strings->get_feature_vector(vec_idx1, len1, free_vec1);
124  uint16_t* vec2=sf->strings->get_feature_vector(vec_idx2, len2, free_vec2);
125 
126  float64_t result=0;
127  uint8_t mask=0;
128 
129  for (int32_t d=0; d<degree; d++)
130  {
131  mask = mask | (1 << (degree-d-1));
132  uint16_t masked=strings->get_masked_symbols(0xffff, mask);
133 
134  int32_t left_idx=0;
135  int32_t right_idx=0;
136  float64_t weight=spec_weights[d]*spec_weights[d];
137 
138  while (left_idx < len1 && right_idx < len2)
139  {
140  uint16_t lsym=vec1[left_idx] & masked;
141  uint16_t rsym=vec2[right_idx] & masked;
142 
143  if (lsym == rsym)
144  {
145  int32_t old_left_idx=left_idx;
146  int32_t old_right_idx=right_idx;
147 
148  while (left_idx<len1 && (vec1[left_idx] & masked) ==lsym)
149  left_idx++;
150 
151  while (right_idx<len2 && (vec2[right_idx] & masked) ==lsym)
152  right_idx++;
153 
154  result+=weight*(left_idx-old_left_idx)*(right_idx-old_right_idx);
155  }
156  else if (lsym<rsym)
157  left_idx++;
158  else
159  right_idx++;
160  }
161  }
162 
163  strings->free_feature_vector(vec1, vec_idx1, free_vec1);
164  sf->strings->free_feature_vector(vec2, vec_idx2, free_vec2);
165 
167  return result*normalization_factors[vec_idx1]*normalization_factors[vec_idx2];
168  else
169  return result;
170 }
171 
172 float64_t CImplicitWeightedSpecFeatures::dense_dot(int32_t vec_idx1, const float64_t* vec2, int32_t vec2_len)
173 {
174  ASSERT(vec2_len == spec_size)
175  ASSERT(vec_idx1 < num_strings)
176 
177  float64_t result=0;
178  int32_t len1=-1;
179  bool free_vec1;
180  uint16_t* vec1=strings->get_feature_vector(vec_idx1, len1, free_vec1);
181 
182  if (vec1 && len1>0)
183  {
184  for (int32_t j=0; j<len1; j++)
185  {
186  uint8_t mask=0;
187  int32_t offs=0;
188  uint16_t v=*vec1++;
189 
190  for (int32_t d=0; d<degree; d++)
191  {
192  mask = mask | (1 << (degree-d-1));
193  int32_t idx=strings->get_masked_symbols(v, mask);
194  idx=strings->shift_symbol(idx, degree-d-1);
195  result += vec2[offs + idx]*spec_weights[d];
196  offs+=strings->shift_offset(1,d+1);
197  }
198  }
199 
200  strings->free_feature_vector(vec1, vec_idx1, free_vec1);
201 
203  result*=normalization_factors[vec_idx1];
204  }
205  else
206  SG_ERROR("huh?\n")
207 
208  return result;
209 }
210 
211 void CImplicitWeightedSpecFeatures::add_to_dense_vec(float64_t alpha, int32_t vec_idx1, float64_t* vec2, int32_t vec2_len, bool abs_val)
212 {
213  int32_t len1=-1;
214  bool free_vec1;
215  uint16_t* vec=strings->get_feature_vector(vec_idx1, len1, free_vec1);
216 
218  alpha*=normalization_factors[vec_idx1];
219 
220  if (vec && len1>0)
221  {
222  for (int32_t j=0; j<len1; j++)
223  {
224  uint8_t mask=0;
225  int32_t offs=0;
226  for (int32_t d=0; d<degree; d++)
227  {
228  mask = mask | (1 << (degree-d-1));
229  int32_t idx=strings->get_masked_symbols(vec[j], mask);
230  idx=strings->shift_symbol(idx, degree-d-1);
231  if (abs_val)
232  vec2[offs + idx] += CMath::abs(alpha*spec_weights[d]);
233  else
234  vec2[offs + idx] += alpha*spec_weights[d];
235  offs+=strings->shift_offset(1,d+1);
236  }
237  }
238  }
239 
240  strings->free_feature_vector(vec, vec_idx1, free_vec1);
241 }
242 
244 {
245  return new CImplicitWeightedSpecFeatures(*this);
246 }
247 
249 {
250  return spec_size;
251 }
252 
254 {
255  if (vector_index>=num_strings)
256  {
257  SG_ERROR("Index out of bounds (number of strings %d, you "
258  "requested %d)\n", num_strings, vector_index);
259  }
260 
261  wspec_feature_iterator* it=SG_MALLOC(wspec_feature_iterator, 1);
262  it->vec= strings->get_feature_vector(vector_index, it->vlen, it->vfree);
263  it->vidx=vector_index;
264 
265  it->offs=0;
266  it->d=0;
267  it->j=0;
268  it->mask=0;
269  it->alpha=normalization_factors[vector_index];
270 
271  return it;
272 }
273 
274 bool CImplicitWeightedSpecFeatures::get_next_feature(int32_t& index, float64_t& value, void* iterator)
275 {
276  wspec_feature_iterator* it=(wspec_feature_iterator*) iterator;
277 
278  if (it->d>=degree)
279  {
280  if (it->j < it->vlen-1)
281  {
282  it->j++;
283  it->d=0;
284  it->mask=0;
285  it->offs=0;
286  }
287  else
288  return false;
289  }
290 
291  int32_t d=it->d;
292 
293  it->mask = it->mask | (1 << (degree-d-1));
294  int32_t idx=strings->get_masked_symbols(it->vec[it->j], it->mask);
295  idx=strings->shift_symbol(idx, degree-d-1);
296  value=it->alpha*spec_weights[d];
297  index=it->offs + idx;
298  it->offs+=strings->shift_offset(1,d+1);
299 
300  it->d=d+1;
301  return true;
302 }
303 
305 {
306  ASSERT(iterator)
307  wspec_feature_iterator* it=(wspec_feature_iterator*) iterator;
308  strings->free_feature_vector(it->vec, it->vidx, it->vfree);
309  SG_FREE(it);
310 }
311 
312 
314 {
315  int32_t vlen=-1;
316  bool free_vec;
317  uint16_t* vec1=strings->get_feature_vector(num, vlen, free_vec);
318  strings->free_feature_vector(vec1, num, free_vec);
319  int32_t nnz=0;
320  for (int32_t i=1; i<=degree; i++)
321  nnz+=CMath::min(CMath::pow(alphabet_size,i), vlen);
322  return nnz;
323 }
324 
326 {
327  return F_UNKNOWN;
328 }
329 
331 {
332  return C_WEIGHTEDSPEC;
333 }
334 
336 {
337  return num_strings;
338 }

SHOGUN Machine Learning Toolbox - Documentation