SHOGUN  3.2.1
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
StreamingVwFeatures.cpp
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2009 Yahoo! Inc. All rights reserved. The copyrights
3  * embodied in the content of this file are licensed under the BSD
4  * (revised) open source license.
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License as published by
8  * the Free Software Foundation; either version 3 of the License, or
9  * (at your option) any later version.
10  *
11  * Written (W) 2011 Shashwat Lal Das
12  * Adaptation of Vowpal Wabbit v5.1.
13  * Copyright (C) 2011 Berlin Institute of Technology and Max-Planck-Society.
14  */
15 
17 
18 using namespace shogun;
19 
21 {
22  init();
24 }
25 
27  bool is_labelled, int32_t size)
29 {
30  init(file, is_labelled, size);
32 }
33 
35  bool is_labelled, int32_t size)
37 {
38  init(file, is_labelled, size);
40 }
41 
43 {
44  if (parser.is_running())
45  parser.end_parser();
46  SG_UNREF(env);
47 }
48 
50 {
51  return new CStreamingVwFeatures(*this);
52 }
53 
55 {
56  parser.set_read_vector(&CStreamingFile::get_vector);
57 }
58 
60 {
61  parser.set_read_vector_and_label(&CStreamingFile::get_vector_and_label);
62 }
63 
65 {
67  {
69  parser.exit_parser();
70  parser.init(working_file, has_labels, parser.get_ring_size());
71  parser.set_free_vector_after_release(false);
72  parser.start_parser();
73  }
74  else
75  SG_ERROR("The input cannot be reset! Please use 1 pass.\n")
76 }
77 
79 {
80  SG_REF(env);
81  return env;
82 }
83 
85 {
86  env = vw_env;
87  SG_REF(env);
88 }
89 
91 {
92  int32_t dim = 1 << env->num_bits;
93  if (dim > len)
94  {
95  vec = SG_REALLOC(float32_t, vec, len, dim);
96  memset(&vec[len], 0, (dim-len) * sizeof(float32_t));
97  len = dim;
98  }
99 }
100 
102 {
103  int32_t dim = 1 << env->num_bits;
104  if (dim > len)
105  {
106  vec = SG_REALLOC(float64_t, vec, len, dim);
107  memset(&vec[len], 0, (dim-len) * sizeof(float64_t));
108  len = dim;
109  }
110 }
111 
113 {
114  float32_t wprime = 0;
115  if (gravity < fabsf(w))
116  wprime = CMath::sign(w)*(fabsf(w) - gravity);
117  return wprime;
118 }
119 
121 {
122  return current_length;
123 }
124 
126 {
127  if (current_example)
128  return 1;
129  else
130  return 0;
131 }
132 
134 {
135  return F_DREAL;
136 }
137 
138 void CStreamingVwFeatures::init()
139 {
140  working_file=NULL;
141  seekable=false;
142  current_length=-1;
143  current_example=NULL;
144  env=NULL;
145 
146  example_count = 0;
147 }
148 
149 void CStreamingVwFeatures::init(CStreamingVwFile* file, bool is_labelled, int32_t size)
150 {
151  init();
152  has_labels = is_labelled;
153  working_file = file;
154  parser.init(file, is_labelled, size);
155  parser.set_free_vector_after_release(false);
156  seekable=false;
157 
158  // Get environment from the StreamingVwFile
159  env = ((CStreamingVwFile*) file)->get_env();
160  SG_REF(env);
161 }
162 
163 void CStreamingVwFeatures::init(CStreamingVwCacheFile* file, bool is_labelled, int32_t size)
164 {
165  init();
166  has_labels = is_labelled;
167  working_file = file;
168  parser.init(file, is_labelled, size);
169  parser.set_free_vector_after_release(false);
170  seekable=true;
171 
172  // Get environment from the StreamingVwFile
173  env = ((CStreamingVwCacheFile*) file)->get_env();
174  SG_REF(env);
175 }
176 
177 void CStreamingVwFeatures::setup_example(VwExample* ae)
178 {
179  ae->pass = env->passes_complete;
180  ae->num_features = 0;
181  ae->total_sum_feat_sq = 1;
183  ae->global_weight = ae->ld->weight;
184  env->t += ae->global_weight;
185  ae->example_t = env->t;
186 
187  // If some namespaces should be ignored, remove them
188  if (env->ignore_some)
189  {
190  for (vw_size_t* i = ae->indices.begin; i != ae->indices.end; i++)
191  if (env->ignore[*i])
192  {
193  ae->atomics[*i].erase();
194  memmove(i,i+1,(ae->indices.end - (i+1))*sizeof(vw_size_t));
195  ae->indices.end--;
196  i--;
197  }
198  }
199 
200  // Add constant feature
201  vw_size_t constant_namespace = 128;
202  VwFeature temp = {1,constant_hash & env->mask};
203  ae->indices.push(constant_namespace);
204  ae->atomics[constant_namespace].push(temp);
205  ae->sum_feat_sq[constant_namespace] = 0;
206 
207  if(env->stride != 1)
208  {
209  // Make room for per-feature information.
210  vw_size_t stride = env->stride;
211  for (vw_size_t* i = ae->indices.begin; i != ae->indices.end; i++)
212  for(VwFeature* j = ae->atomics[*i].begin; j != ae->atomics[*i].end; j++)
213  j->weight_index = j->weight_index*stride;
214  }
215 
216  for (vw_size_t* i = ae->indices.begin; i != ae->indices.end; i++)
217  {
218  ae->num_features += ae->atomics[*i].end - ae->atomics[*i].begin;
219  ae->total_sum_feat_sq += ae->sum_feat_sq[*i];
220  }
221 
222  // For quadratic features
223  for (int32_t k = 0; k < env->pairs.get_num_elements(); k++)
224  {
225  char* i = env->pairs.get_element(k);
226 
227  ae->num_features
228  += (ae->atomics[(int32_t)(i[0])].end - ae->atomics[(int32_t)(i[0])].begin)
229  *(ae->atomics[(int32_t)(i[1])].end - ae->atomics[(int32_t)(i[1])].begin);
230 
231  ae->total_sum_feat_sq += ae->sum_feat_sq[(int32_t)(i[0])]*ae->sum_feat_sq[(int32_t)(i[1])];
232  }
233 }
234 
236 {
237  if (!parser.is_running())
238  parser.start_parser();
239 }
240 
242 {
243  parser.end_parser();
244 }
245 
247 {
248  bool ret_value;
249  ret_value = (bool) parser.get_next_example(current_example,
251  current_label);
252  if (current_length < 1)
253  return false;
254 
255  if (ret_value)
256  setup_example(current_example);
257  else
258  return false;
259 
262 
263  return ret_value;
264 }
265 
267 {
268  return current_example;
269 }
270 
272 {
274 
275  return current_label;
276 }
277 
279 {
280  env->example_number++;
282 
283  if (current_example->ld->label == FLT_MAX)
284  env->weighted_labels += 0;
285  else
287 
290 
292  parser.finalize_example();
293 }
294 
296 {
297  return current_length;
298 }
299 
301 {
303  return CMath::INFTY;
304 }
305 
307 {
308  float32_t ret = 0.;
309  for (vw_size_t* i = ex->indices.begin; i!= ex->indices.end; i++)
310  {
311  for (VwFeature* f = ex->atomics[*i].begin; f != ex->atomics[*i].end; f++)
312  ret += vec2[f->weight_index & env->thread_mask] * f->x;
313  }
314  return ret;
315 }
316 
318 {
319  return dense_dot(current_example, vec2);
320 }
321 
323 {
324  float32_t ret = 0.;
325  for (int32_t i = 0; i < vec1->num_feat_entries; i++)
326  ret += vec1->features[i].entry * vec2[vec1->features[i].feat_index & env->mask];
327 
328  return ret;
329 }
330 
332 {
333  float32_t ret = 0.;
334  for (vw_size_t* i = ex->indices.begin; i != ex->indices.end; i++)
335  {
336  for (VwFeature* f = ex->atomics[*i].begin; f!= ex->atomics[*i].end; f++)
337  {
338  float32_t w = vec2[f->weight_index & env->thread_mask];
339  float32_t wprime = real_weight(w,gravity);
340  ret += wprime*f->x;
341  }
342  }
343 
344  return ret;
345 }
346 
347 void CStreamingVwFeatures::add_to_dense_vec(float32_t alpha, VwExample* &ex, float32_t* vec2, int32_t vec2_len, bool abs_val)
348 {
349  if (abs_val)
350  {
351  for (vw_size_t* i = ex->indices.begin; i != ex->indices.end; i++)
352  {
353  for (VwFeature* f = ex->atomics[*i].begin; f != ex->atomics[*i].end; f++)
354  vec2[f->weight_index & env->thread_mask] += alpha * abs(f->x);
355  }
356  }
357  else
358  {
359  for (vw_size_t* i = ex->indices.begin; i != ex->indices.end; i++)
360  {
361  for (VwFeature* f = ex->atomics[*i].begin; f != ex->atomics[*i].end; f++)
362  vec2[f->weight_index & env->thread_mask] += alpha * f->x;
363  }
364  }
365 }
366 
367 void CStreamingVwFeatures::add_to_dense_vec(float32_t alpha, float32_t* vec2, int32_t vec2_len, bool abs_val)
368 {
369  add_to_dense_vec(alpha, current_example, vec2, vec2_len, abs_val);
370 }
371 
373 {
374  return current_length;
375 }
376 
378 {
379  return C_STREAMING_VW;
380 }

SHOGUN Machine Learning Toolbox - Documentation