SHOGUN  4.2.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
VwParser.h
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2009 Yahoo! Inc. All rights reserved. The copyrights
3  * embodied in the content of this file are licensed under the BSD
4  * (revised) open source license.
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License as published by
8  * the Free Software Foundation; either version 3 of the License, or
9  * (at your option) any later version.
10  *
11  * Written (W) 2011 Shashwat Lal Das
12  * Adaptation of Vowpal Wabbit v5.1.
13  * Copyright (C) 2011 Berlin Institute of Technology and Max-Planck-Society.
14  */
15 
16 #ifndef _VW_PARSER_H__
17 #define _VW_PARSER_H__
18 
19 #include <shogun/lib/config.h>
20 
21 #include <shogun/base/SGObject.h>
22 #include <shogun/io/SGIO.h>
23 #include <shogun/lib/Hash.h>
26 
27 namespace shogun
28 {
31 {
32  T_VW = 1,
34  T_DENSE = 3
35 };
36 
48 class CVwParser: public CSGObject
49 {
50 public:
54  CVwParser();
55 
61  CVwParser(CVwEnvironment* env_to_use);
62 
66  virtual ~CVwParser();
67 
74  {
75  SG_REF(env);
76  return env;
77  }
78 
84  void set_env(CVwEnvironment* env_to_use)
85  {
86  env = env_to_use;
87  SG_REF(env);
88  }
89 
96  void set_cache_parameters(char * fname, EVwCacheType type = C_NATIVE)
97  {
98  init_cache(fname, type);
99  }
100 
107  {
108  return cache_type;
109  }
110 
116  void set_write_cache(bool wr_cache)
117  {
118  write_cache = wr_cache;
119  if (wr_cache)
120  init_cache(NULL);
121  else
122  if (cache_writer)
124  }
125 
132  {
133  return write_cache;
134  }
135 
141  void set_mm(float64_t label)
142  {
143  env->min_label = CMath::min(env->min_label, label);
144  if (label != FLT_MAX)
145  env->max_label = CMath::max(env->max_label, label);
146  }
147 
154  void noop_mm(float64_t label) { }
155 
162  void set_minmax(float64_t label)
163  {
164  set_mm(label);
165  }
166 
175  int32_t read_features(CIOBuffer* buf, VwExample*& ex);
176 
185  int32_t read_svmlight_features(CIOBuffer* buf, VwExample*& ae);
186 
195  int32_t read_dense_features(CIOBuffer* buf, VwExample*& ae);
196 
202  virtual const char* get_name() const { return "VwParser"; }
203 
204 protected:
211  void init_cache(char * fname, EVwCacheType type = C_NATIVE);
212 
222 
231  void tokenize(char delim, substring s, v_array<substring> &ret);
232 
243  inline char* safe_index(char *start, char v, char *max)
244  {
245  while (start != max && *start != v)
246  start++;
247  return start;
248  }
249 
250 public:
253 
254 protected:
263 
264 private:
266  v_array<substring> channels;
267  v_array<substring> words;
268  v_array<substring> name;
269 };
270 
271 }
272 #endif // _VW_PARSER_H__
An I/O buffer class.
Definition: IOBuffer.h:41
uint32_t(* hash_func_t)(substring, uint32_t)
Hash function typedef, takes a substring and seed as parameters.
Definition: vw_constants.h:23
void feature_value(substring &s, v_array< substring > &name, float32_t &v)
Definition: VwParser.cpp:271
CVwCacheWriter is the base class for all VW cache creating classes.
Definition: VwCacheWriter.h:35
char * safe_index(char *start, char v, char *max)
Definition: VwParser.h:243
virtual ~CVwParser()
Definition: VwParser.cpp:42
Class CVwEnvironment is the environment used by VW.
Definition: VwEnvironment.h:41
int32_t read_features(CIOBuffer *buf, VwExample *&ex)
Definition: VwParser.cpp:48
CVwEnvironment * env
Environment of VW - used by parser.
Definition: VwParser.h:256
CVwParser is the object which provides the functions to parse examples from buffered input...
Definition: VwParser.h:48
float64_t min_label
Smallest label seen.
Class v_array taken directly from JL's implementation.
EVwCacheType get_cache_type()
Definition: VwParser.h:106
void set_minmax(float64_t label)
Definition: VwParser.h:162
CVwCacheWriter * cache_writer
Object which will be used for writing cache.
Definition: VwParser.h:258
E_VW_PARSER_TYPE
The type of input to parse.
Definition: VwParser.h:30
int32_t read_dense_features(CIOBuffer *buf, VwExample *&ae)
Definition: VwParser.cpp:206
void set_write_cache(bool wr_cache)
Definition: VwParser.h:116
CVwEnvironment * get_env()
Definition: VwParser.h:73
struct Substring, specified by start position and end position.
Definition: SGIO.h:229
float64_t max_label
Largest label seen.
#define SG_REF(x)
Definition: SGObject.h:51
void tokenize(char delim, substring s, v_array< substring > &ret)
Definition: VwParser.cpp:295
bool write_cache
Whether to write cache or not.
Definition: VwParser.h:262
bool get_write_cache()
Definition: VwParser.h:131
Class SGObject is the base class of all shogun objects.
Definition: SGObject.h:112
int32_t read_svmlight_features(CIOBuffer *buf, VwExample *&ae)
Definition: VwParser.cpp:164
double float64_t
Definition: common.h:50
Example class for VW.
Definition: vw_example.h:58
static T max(T a, T b)
Definition: Math.h:168
virtual const char * get_name() const
Definition: VwParser.h:202
EVwCacheType cache_type
Type of cache.
Definition: VwParser.h:260
float float32_t
Definition: common.h:49
#define SG_UNREF(x)
Definition: SGObject.h:52
all of classes and functions are contained in the shogun namespace
Definition: class_list.h:18
void noop_mm(float64_t label)
Definition: VwParser.h:154
void set_mm(float64_t label)
Definition: VwParser.h:141
void set_env(CVwEnvironment *env_to_use)
Definition: VwParser.h:84
static T min(T a, T b)
Definition: Math.h:157
void init_cache(char *fname, EVwCacheType type=C_NATIVE)
Definition: VwParser.cpp:248
hash_func_t hasher
Hash function to use, of type hash_func_t.
Definition: VwParser.h:252
void set_cache_parameters(char *fname, EVwCacheType type=C_NATIVE)
Definition: VwParser.h:96
Matrix::Scalar max(Matrix m)
Definition: Redux.h:68

SHOGUN Machine Learning Toolbox - Documentation