SHOGUN  6.0.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules
VowpalWabbit.cpp
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2009 Yahoo! Inc. All rights reserved. The copyrights
3  * embodied in the content of this file are licensed under the BSD
4  * (revised) open source license.
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License as published by
8  * the Free Software Foundation; either version 3 of the License, or
9  * (at your option) any later version.
10  *
11  * Written (W) 2011 Shashwat Lal Das
12  * Adaptation of Vowpal Wabbit v5.1.
13  * Copyright (C) 2011 Berlin Institute of Technology and Max-Planck-Society.
14  */
16 
17 #include <algorithm>
18 #include <fcntl.h>
19 #include <shogun/lib/Signal.h>
20 
21 #ifdef _WIN32
22 #include <io.h>
23 #endif
24 
25 using namespace std;
26 using namespace shogun;
27 
28 CVowpalWabbit::CVowpalWabbit()
30 {
31  reg=NULL;
32  learner=NULL;
33  init(NULL);
34 }
35 
38 {
39  reg=NULL;
40  learner=NULL;
41  init(feat);
42 }
43 
46 {
47  features = vw->features;
48  env = vw->env;
49  reg = new CVwRegressor(env);
50  SG_REF(env);
51  SG_REF(reg);
52 
53  quiet = vw->quiet;
54  no_training = vw->no_training;
55  dump_interval = vw->dump_interval;
56  sum_loss_since_last_dump = 0.;
57  reg_name = vw->reg_name;
58  reg_dump_text = vw->reg_dump_text;
59  save_predictions = vw->save_predictions;
60  prediction_fd = vw->prediction_fd;
61 
62  w = reg->weight_vectors[0];
63  reg->weight_vectors[0] = NULL;
64  copy(vw->w, vw->w+vw->w_dim, w);
65  w_dim = vw->w_dim;
66  bias = vw->bias;
67 }
68 
70 {
71  SG_UNREF(env);
72  SG_UNREF(reg);
74 }
75 
77 {
78  if (reg->weight_vectors)
79  {
80  if (reg->weight_vectors[0])
81  SG_FREE(reg->weight_vectors[0]);
82  SG_FREE(reg->weight_vectors);
83  }
84 
85  reg->init(env);
86  w = reg->weight_vectors[0];
87  reg->weight_vectors[0] = NULL;
88 }
89 
90 void CVowpalWabbit::set_adaptive(bool adaptive_learning)
91 {
92  if (adaptive_learning)
93  {
94  env->adaptive = true;
95  env->set_stride(2);
96  env->power_t = 0.;
98  }
99  else
100  env->adaptive = false;
101 }
102 
103 void CVowpalWabbit::set_exact_adaptive_norm(bool exact_adaptive)
104 {
105  if (exact_adaptive)
106  {
107  set_adaptive(true);
108  env->exact_adaptive_norm = true;
109  }
110  else
111  env->exact_adaptive_norm = false;
112 }
113 
114 void CVowpalWabbit::load_regressor(char* file_name)
115 {
116  reg->load_regressor(file_name);
117  w = reg->weight_vectors[0];
118  reg->weight_vectors[0] = NULL;
119  w_dim = 1 << env->num_bits;
120 }
121 
122 void CVowpalWabbit::set_regressor_out(char* file_name, bool is_text)
123 {
124  reg_name = file_name;
125  reg_dump_text = is_text;
126 }
127 
129 {
130  save_predictions = true;
131  prediction_fd = open(file_name, O_CREAT|O_TRUNC|O_WRONLY, 0666);
132  if (prediction_fd < 0)
133  SG_SERROR("Unable to open prediction file %s for writing!\n", file_name)
134 }
135 
137 {
138  env->pairs.push_back(pair);
139 }
140 
142 {
143  ASSERT(features || feat)
144  if (feat && (features != (CStreamingVwFeatures*) feat))
145  {
147  init((CStreamingVwFeatures*) feat);
148  }
149 
150  set_learner();
151 
152  VwExample* example = NULL;
153  vw_size_t current_pass = 0;
154 
155  const char* header_fmt = "%-10s %-10s %8s %8s %10s %8s %8s\n";
156 
157  if (!quiet)
158  {
159  SG_SPRINT(header_fmt,
160  "average", "since", "example", "example",
161  "current", "current", "current");
162  SG_SPRINT(header_fmt,
163  "loss", "last", "counter", "weight", "label", "predict", "features");
164  }
165 
169  {
170  while (features->get_next_example())
171  {
172  example = features->get_example();
173 
174  // Check if we shouldn't train (generally used for cache creation)
175  if (!no_training)
176  {
177  if (example->pass != current_pass)
178  {
179  env->eta *= env->eta_decay_rate;
180  current_pass = example->pass;
181  }
182 
183  predict_and_finalize(example);
184 
185  learner->train(example, example->eta_round);
186  example->eta_round = 0.;
187 
188  output_example(example);
189  }
190 
192  }
193  env->passes_complete++;
196  }
197  features->end_parser();
198 
199  if (env->l1_regularization > 0.)
200  {
201  uint32_t length = 1 << env->num_bits;
202  vw_size_t stride = env->stride;
204  for (uint32_t i = 0; i < length; i++)
205  reg->weight_vectors[0][stride*i] = real_weight(reg->weight_vectors[0][stride*i], gravity);
206  }
207 
208  if (reg_name != NULL)
209  reg->dump_regressor(reg_name, reg_dump_text);
210 
211  return true;
212 }
213 
215 {
216  float32_t prediction;
217  if (env->l1_regularization != 0.)
218  prediction = inline_l1_predict(ex);
219  else
220  prediction = inline_predict(ex);
221 
222  ex->final_prediction = 0;
223  ex->final_prediction += prediction;
224  ex->final_prediction = finalize_prediction(ex->final_prediction);
225  float32_t t = ex->example_t;
226 
227  if (ex->ld->label != FLT_MAX)
228  {
229  ex->loss = reg->get_loss(ex->final_prediction, ex->ld->label) * ex->ld->weight;
230  float64_t update = 0.;
232  {
233  float32_t sum_abs_x = 0.;
234  float32_t exact_norm = compute_exact_norm(ex, sum_abs_x);
235  update = (env->eta * exact_norm)/sum_abs_x;
236  env->update_sum += update;
237  ex->eta_round = reg->get_update(ex->final_prediction, ex->ld->label, update, exact_norm);
238  }
239  else
240  {
241  update = (env->eta)/pow(t, env->power_t) * ex->ld->weight;
242  ex->eta_round = reg->get_update(ex->final_prediction, ex->ld->label, update, ex->total_sum_feat_sq);
243  }
244  env->update_sum += update;
245  }
246 
247  return prediction;
248 }
249 
250 void CVowpalWabbit::init(CStreamingVwFeatures* feat)
251 {
252  features = feat;
253 
254  if (feat)
255  env = feat->get_env();
256  else
257  {
258  env=new CVwEnvironment();
259  SG_REF(env);
260  }
261 
262  reg = new CVwRegressor(env);
263  SG_REF(reg);
264 
265  quiet = true;
266  no_training = false;
267  dump_interval = exp(1.);
268  sum_loss_since_last_dump = 0.;
269  reg_name = NULL;
270  reg_dump_text = true;
271  save_predictions = false;
272  prediction_fd = -1;
273 
274  w = reg->weight_vectors[0];
275  reg->weight_vectors[0] = NULL;
276  w_dim = 1 << env->num_bits;
277  bias = 0.;
278 }
279 
281 {
282  if (env->adaptive)
284  else
286  SG_REF(learner);
287 }
288 
289 float32_t CVowpalWabbit::inline_l1_predict(VwExample* &ex)
290 {
291  vw_size_t thread_num = 0;
292 
293  float32_t prediction = ex->ld->get_initial();
294 
295  float32_t* weights = reg->weight_vectors[thread_num];
296  vw_size_t thread_mask = env->thread_mask;
297 
298  prediction += features->dense_dot_truncated(weights, ex, env->l1_regularization * env->update_sum);
299 
300  for (int32_t k = 0; k < env->pairs.get_num_elements(); k++)
301  {
302  char* i = env->pairs.get_element(k);
303 
304  v_array<VwFeature> temp = ex->atomics[(int32_t)(i[0])];
305  temp.begin = ex->atomics[(int32_t)(i[0])].begin;
306  temp.end = ex->atomics[(int32_t)(i[0])].end;
307  for (; temp.begin != temp.end; temp.begin++)
308  prediction += one_pf_quad_predict_trunc(weights, *temp.begin,
309  ex->atomics[(int32_t)(i[1])], thread_mask,
311  }
312 
313  return prediction;
314 }
315 
316 float32_t CVowpalWabbit::inline_predict(VwExample* &ex)
317 {
318  vw_size_t thread_num = 0;
319  float32_t prediction = ex->ld->initial;
320 
321  float32_t* weights = reg->weight_vectors[thread_num];
322  vw_size_t thread_mask = env->thread_mask;
323  prediction += features->dense_dot(weights, 0);
324 
325  for (int32_t k = 0; k < env->pairs.get_num_elements(); k++)
326  {
327  char* i = env->pairs.get_element(k);
328 
329  v_array<VwFeature> temp = ex->atomics[(int32_t)(i[0])];
330  temp.begin = ex->atomics[(int32_t)(i[0])].begin;
331  temp.end = ex->atomics[(int32_t)(i[0])].end;
332  for (; temp.begin != temp.end; temp.begin++)
333  prediction += one_pf_quad_predict(weights, *temp.begin,
334  ex->atomics[(int32_t)(i[1])],
335  thread_mask);
336  }
337 
338  return prediction;
339 }
340 
341 float32_t CVowpalWabbit::finalize_prediction(float32_t ret)
342 {
343  if (isnan(ret))
344  return 0.5;
345  if (ret > env->max_label)
346  return env->max_label;
347  if (ret < env->min_label)
348  return env->min_label;
349 
350  return ret;
351 }
352 
353 void CVowpalWabbit::output_example(VwExample* &example)
354 {
355  if (!quiet)
356  {
357  sum_loss_since_last_dump += example->loss;
358  if (env->weighted_examples + example->ld->weight > dump_interval)
359  {
360  print_update(example);
361  dump_interval *= 2;
362  }
363  }
364 
365  if (save_predictions)
366  {
367  float32_t wt = 0.;
368  if (reg->weight_vectors)
369  wt = reg->weight_vectors[0][0];
370 
371  output_prediction(prediction_fd, example->final_prediction, wt * example->global_weight, example->tag);
372  }
373 }
374 
375 void CVowpalWabbit::print_update(VwExample* &ex)
376 {
377  SG_SPRINT("%-10.6f %-10.6f %8lld %8.1f %8.4f %8.4f %8lu\n",
378  (env->sum_loss + ex->loss)/(env->weighted_examples + ex->ld->weight),
379  sum_loss_since_last_dump/(env->weighted_examples + ex->ld->weight - old_weighted_examples),
380  env->example_number + 1,
381  env->weighted_examples + ex->ld->weight,
382  ex->ld->label,
383  ex->final_prediction,
384  (long unsigned int)ex->num_features);
385  sum_loss_since_last_dump = 0.0;
386  old_weighted_examples = env->weighted_examples + ex->ld->weight;
387 }
388 
389 
390 void CVowpalWabbit::output_prediction(int32_t f, float32_t res, float32_t weight, v_array<char> tag)
391 {
392  if (f >= 0)
393  {
394  char temp[30];
395  int32_t num = sprintf(temp, "%f", res);
396  ssize_t t;
397  t = write(f, temp, num);
398  if (t != num)
399  SG_SERROR("Write error!\n")
400 
401  if (tag.begin != tag.end)
402  {
403  temp[0] = ' ';
404  t = write(f, temp, 1);
405  if (t != 1)
406  SG_SERROR("Write error!\n")
407 
408  t = write(f, tag.begin, sizeof(char)*tag.index());
409  if (t != (ssize_t) (sizeof(char)*tag.index()))
410  SG_SERROR("Write error!\n")
411  }
412 
413  temp[0] = '\n';
414  t = write(f, temp, 1);
415  if (t != 1)
416  SG_SERROR("Write error!\n")
417  }
418 }
419 
420 void CVowpalWabbit::set_verbose(bool verbose)
421 {
422  quiet=verbose==false;
423 }
424 
425 
427 {
428  // We must traverse the features in _precisely_ the same order as during training.
429  vw_size_t thread_mask = env->thread_mask;
430  vw_size_t thread_num = 0;
431 
433  if (g == 0) return 0.;
434 
435  float32_t xGx = 0.;
436 
437  float32_t* weights = reg->weight_vectors[thread_num];
438  for (vw_size_t* i = ex->indices.begin; i != ex->indices.end; i++)
439  {
440  for (VwFeature* f = ex->atomics[*i].begin; f != ex->atomics[*i].end; f++)
441  {
442  float32_t* w_vec = &weights[f->weight_index & thread_mask];
443  float32_t t = f->x * CMath::invsqrt(w_vec[1] + g * f->x * f->x);
444  xGx += t * f->x;
445  sum_abs_x += fabsf(f->x);
446  }
447  }
448 
449  for (int32_t k = 0; k < env->pairs.get_num_elements(); k++)
450  {
451  char* i = env->pairs.get_element(k);
452 
453  v_array<VwFeature> temp = ex->atomics[(int32_t)(i[0])];
454  temp.begin = ex->atomics[(int32_t)(i[0])].begin;
455  temp.end = ex->atomics[(int32_t)(i[0])].end;
456  for (; temp.begin != temp.end; temp.begin++)
457  xGx += compute_exact_norm_quad(weights, *temp.begin, ex->atomics[(int32_t)(i[1])], thread_mask, g, sum_abs_x);
458  }
459 
460  return xGx;
461 }
462 
464  vw_size_t mask, float32_t g, float32_t& sum_abs_x)
465 {
466  vw_size_t halfhash = quadratic_constant * page_feature.weight_index;
467  float32_t xGx = 0.;
468  float32_t update2 = g * page_feature.x * page_feature.x;
469  for (VwFeature* elem = offer_features.begin; elem != offer_features.end; elem++)
470  {
471  float32_t* w_vec = &weights[(halfhash + elem->weight_index) & mask];
472  float32_t t = elem->x * CMath::invsqrt(w_vec[1] + update2 * elem->x * elem->x);
473  xGx += t * elem->x;
474  sum_abs_x += fabsf(elem->x);
475  }
476  return xGx;
477 }
uint32_t weight_index
Hashed index in weight vector.
Definition: vw_example.h:41
uint32_t vw_size_t
vw_size_t typedef to work across platforms
Definition: vw_constants.h:26
CVwRegressor * reg
Regressor.
Definition: VowpalWabbit.h:288
T get_element(int32_t index) const
Definition: DynArray.h:142
Class OnlineLinearMachine is a generic interface for linear machines like classifiers which work thro...
void set_adaptive(bool adaptive_learning)
float64_t weighted_examples
Weighted examples.
T * end
Pointer to last set element in the array.
Definition: v_array.h:160
virtual void load_regressor(char *file_name)
virtual void init(CVwEnvironment *env_to_use=NULL)
Definition: VwRegressor.cpp:55
void set_prediction_out(char *file_name)
T * begin
Pointer to first element of the array.
Definition: v_array.h:157
Class CVwEnvironment is the environment used by VW.
Definition: VwEnvironment.h:41
CLossFunction * loss
Loss function.
Definition: VwRegressor.h:118
void set_stride(vw_size_t new_stride)
vw_size_t num_features
Number of features.
Definition: vw_example.h:89
void(* update)(float *foo, float bar)
Definition: JLCoverTree.h:533
float64_t min_label
Smallest label seen.
Class v_array taken directly from JL's implementation.
float32_t one_pf_quad_predict_trunc(float32_t *weights, VwFeature &f, v_array< VwFeature > &cross_features, vw_size_t mask, float32_t gravity)
Definition: vw_math.cpp:48
int64_t example_number
Example number.
float32_t total_sum_feat_sq
Total sum of square of features.
Definition: vw_example.h:106
Definition: basetag.h:132
float32_t ** weight_vectors
Weight vectors, one array for each thread.
Definition: VwRegressor.h:116
float32_t l1_regularization
Level of L1 regularization.
vw_size_t num_bits
log_2 of the number of features
int32_t get_num_elements() const
Definition: DynArray.h:130
VwAdaptiveLearner uses an adaptive subgradient technique to update weights.
float64_t get_loss(float64_t prediction, float64_t label)
Definition: VwRegressor.h:65
const int32_t quadratic_constant
Constant used while hashing/accessing quadratic features.
Definition: vw_constants.h:29
float32_t eta
Learning rate.
float32_t real_weight(float32_t w, float32_t gravity)
Definition: vw_math.h:35
CVwEnvironment * env
Environment for VW, i.e., globals.
Definition: VowpalWabbit.h:282
float64_t max_label
Largest label seen.
#define SG_REF(x)
Definition: SGObject.h:52
float32_t loss
Loss.
Definition: vw_example.h:95
float32_t label
Label value.
Definition: vw_label.h:92
vw_size_t pass
Pass.
Definition: vw_example.h:91
float32_t compute_exact_norm_quad(float32_t *weights, VwFeature &page_feature, v_array< VwFeature > &offer_features, vw_size_t mask, float32_t g, float32_t &sum_abs_x)
void load_regressor(char *file_name)
v_array< vw_size_t > indices
Array of namespaces.
Definition: vw_example.h:84
virtual void set_learner()
float32_t update_sum
Sum of updates.
float32_t get_initial()
Definition: vw_label.h:75
bool exact_adaptive_norm
Whether exact norm is used for adaptive learning.
virtual float32_t dense_dot_truncated(const float32_t *vec2, VwExample *&ex, float32_t gravity)
static float32_t invsqrt(float32_t x)
x^0.5, x being a complex128_t
Definition: Math.h:490
virtual CVwEnvironment * get_env()
#define SG_SPRINT(...)
Definition: SGIO.h:179
float32_t power_t
t power value while updating
float32_t weight
Weight of example.
Definition: vw_label.h:94
#define ASSERT(x)
Definition: SGIO.h:200
void push_back(T element)
Definition: DynArray.h:254
VwNonAdaptiveLearner uses a standard gradient descent weight update rule.
float32_t eta_decay_rate
Decay rate of eta per pass.
static void clear_cancel()
Definition: Signal.cpp:126
double float64_t
Definition: common.h:60
float32_t compute_exact_norm(VwExample *&ex, float32_t &sum_abs_x)
DynArray< char * > pairs
Pairs of features to cross for quadratic updates.
vw_size_t num_passes
Number of passes.
float32_t final_prediction
Final prediction.
Definition: vw_example.h:93
Regressor used by VW.
Definition: VwRegressor.h:37
virtual void train(VwExample *&ex, float32_t update)=0
vw_size_t stride
Number of elements in weight vector per feature.
v_array< char > tag
Tag.
Definition: vw_example.h:82
void set_exact_adaptive_norm(bool exact_adaptive)
Example class for VW.
Definition: vw_example.h:58
virtual float32_t predict_and_finalize(VwExample *ex)
float32_t example_t
t value for this example
Definition: vw_example.h:101
static bool cancel_computations()
Definition: Signal.h:111
This class implements streaming features for use with VW.
void set_regressor_out(char *file_name, bool is_text=true)
float float32_t
Definition: common.h:59
float32_t initial
Initial approximation.
Definition: vw_label.h:96
float32_t global_weight
Global weight.
Definition: vw_example.h:99
One feature in VW.
Definition: vw_example.h:34
float32_t x
Feature value.
Definition: vw_example.h:38
#define SG_UNREF(x)
Definition: SGObject.h:53
virtual bool train_machine(CFeatures *feat=NULL)
all of classes and functions are contained in the shogun namespace
Definition: class_list.h:18
CStreamingVwFeatures * features
Features.
Definition: VowpalWabbit.h:279
The class Features is the base class of all feature objects.
Definition: Features.h:68
VwLabel * ld
Label object.
Definition: vw_example.h:79
float32_t eta_round
Learning rate for this round.
Definition: vw_example.h:97
void add_quadratic_pair(char *pair)
#define SG_SERROR(...)
Definition: SGIO.h:178
Class CVowpalWabbit is the implementation of the online learning algorithm used in Vowpal Wabbit...
Definition: VowpalWabbit.h:40
vw_size_t thread_mask
Mask used by regressor for learning.
bool adaptive
Whether adaptive learning is used.
float32_t one_pf_quad_predict(float32_t *weights, VwFeature &f, v_array< VwFeature > &cross_features, vw_size_t mask)
Definition: vw_math.cpp:40
virtual float32_t dense_dot(VwExample *&ex, const float32_t *vec2)
vw_size_t passes_complete
Number of passes complete.
virtual void dump_regressor(char *reg_name, bool as_text)
Definition: VwRegressor.cpp:93
float64_t get_update(float64_t prediction, float64_t label, float64_t eta_t, float64_t norm)
Definition: VwRegressor.h:80
CVwLearner * learner
Learner to use.
Definition: VowpalWabbit.h:285
virtual float64_t get_square_grad(float64_t prediction, float64_t label)=0
float64_t sum_loss
Sum of losses.
v_array< VwFeature > atomics[256]
Array of features.
Definition: vw_example.h:86

SHOGUN Machine Learning Toolbox - Documentation