SHOGUN  5.0.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules
VwParser.h
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2009 Yahoo! Inc. All rights reserved. The copyrights
3  * embodied in the content of this file are licensed under the BSD
4  * (revised) open source license.
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License as published by
8  * the Free Software Foundation; either version 3 of the License, or
9  * (at your option) any later version.
10  *
11  * Written (W) 2011 Shashwat Lal Das
12  * Adaptation of Vowpal Wabbit v5.1.
13  * Copyright (C) 2011 Berlin Institute of Technology and Max-Planck-Society.
14  */
15 
16 #ifndef _VW_PARSER_H__
17 #define _VW_PARSER_H__
18 
19 #include <shogun/lib/config.h>
20 
21 #include <shogun/base/SGObject.h>
22 #include <shogun/io/SGIO.h>
23 #include <shogun/lib/Hash.h>
26 
27 namespace shogun
28 {
31 {
32  T_VW = 1,
34  T_DENSE = 3
35 };
36 
48 class CVwParser: public CSGObject
49 {
50 public:
54  CVwParser();
55 
61  CVwParser(CVwEnvironment* env_to_use);
62 
66  virtual ~CVwParser();
67 
74  {
75  return env;
76  }
77 
83  void set_env(CVwEnvironment* env_to_use)
84  {
85  if(env_to_use != env)
86  {
87  SG_REF(env_to_use);
88  SG_UNREF(env);
89  env = env_to_use;
90  }
91  }
92 
99  void set_cache_parameters(char * fname, EVwCacheType type = C_NATIVE)
100  {
101  init_cache(fname, type);
102  }
103 
110  {
111  return cache_type;
112  }
113 
119  void set_write_cache(bool wr_cache)
120  {
121  write_cache = wr_cache;
122  if (wr_cache)
123  init_cache(NULL);
124  else
125  if (cache_writer)
127  }
128 
135  {
136  return write_cache;
137  }
138 
144  void set_mm(float64_t label)
145  {
146  env->min_label = CMath::min(env->min_label, label);
147  if (label != FLT_MAX)
148  env->max_label = CMath::max(env->max_label, label);
149  }
150 
157  void noop_mm(float64_t label) { }
158 
165  void set_minmax(float64_t label)
166  {
167  set_mm(label);
168  }
169 
178  int32_t read_features(CIOBuffer* buf, VwExample*& ex);
179 
188  int32_t read_svmlight_features(CIOBuffer* buf, VwExample*& ae);
189 
198  int32_t read_dense_features(CIOBuffer* buf, VwExample*& ae);
199 
205  virtual const char* get_name() const { return "VwParser"; }
206 
207 protected:
214  void init_cache(char * fname, EVwCacheType type = C_NATIVE);
215 
225 
234  void tokenize(char delim, substring s, v_array<substring> &ret);
235 
246  inline char* safe_index(char *start, char v, char *max)
247  {
248  while (start != max && *start != v)
249  start++;
250  return start;
251  }
252 
253 public:
256 
257 protected:
266 
267 private:
269  v_array<substring> channels;
270  v_array<substring> words;
271  v_array<substring> name;
272 };
273 
274 }
275 #endif // _VW_PARSER_H__
An I/O buffer class.
Definition: IOBuffer.h:41
uint32_t(* hash_func_t)(substring, uint32_t)
Hash function typedef, takes a substring and seed as parameters.
Definition: vw_constants.h:23
void feature_value(substring &s, v_array< substring > &name, float32_t &v)
Definition: VwParser.cpp:277
CVwCacheWriter is the base class for all VW cache creating classes.
Definition: VwCacheWriter.h:35
char * safe_index(char *start, char v, char *max)
Definition: VwParser.h:246
virtual ~CVwParser()
Definition: VwParser.cpp:48
Class CVwEnvironment is the environment used by VW.
Definition: VwEnvironment.h:41
int32_t read_features(CIOBuffer *buf, VwExample *&ex)
Definition: VwParser.cpp:54
CVwEnvironment * env
Environment of VW - used by parser.
Definition: VwParser.h:259
CVwParser is the object which provides the functions to parse examples from buffered input...
Definition: VwParser.h:48
float64_t min_label
Smallest label seen.
Class v_array taken directly from JL's implementation.
EVwCacheType get_cache_type()
Definition: VwParser.h:109
void set_minmax(float64_t label)
Definition: VwParser.h:165
CVwCacheWriter * cache_writer
Object which will be used for writing cache.
Definition: VwParser.h:261
E_VW_PARSER_TYPE
The type of input to parse.
Definition: VwParser.h:30
int32_t read_dense_features(CIOBuffer *buf, VwExample *&ae)
Definition: VwParser.cpp:212
void set_write_cache(bool wr_cache)
Definition: VwParser.h:119
CVwEnvironment * get_env()
Definition: VwParser.h:73
struct Substring, specified by start position and end position.
Definition: SGIO.h:229
float64_t max_label
Largest label seen.
#define SG_REF(x)
Definition: SGObject.h:54
void tokenize(char delim, substring s, v_array< substring > &ret)
Definition: VwParser.cpp:301
bool write_cache
Whether to write cache or not.
Definition: VwParser.h:265
bool get_write_cache()
Definition: VwParser.h:134
Class SGObject is the base class of all shogun objects.
Definition: SGObject.h:115
int32_t read_svmlight_features(CIOBuffer *buf, VwExample *&ae)
Definition: VwParser.cpp:170
double float64_t
Definition: common.h:50
Example class for VW.
Definition: vw_example.h:58
static T max(T a, T b)
Definition: Math.h:168
virtual const char * get_name() const
Definition: VwParser.h:205
EVwCacheType cache_type
Type of cache.
Definition: VwParser.h:263
float float32_t
Definition: common.h:49
#define SG_UNREF(x)
Definition: SGObject.h:55
all of classes and functions are contained in the shogun namespace
Definition: class_list.h:18
void noop_mm(float64_t label)
Definition: VwParser.h:157
void set_mm(float64_t label)
Definition: VwParser.h:144
void set_env(CVwEnvironment *env_to_use)
Definition: VwParser.h:83
static T min(T a, T b)
Definition: Math.h:157
void init_cache(char *fname, EVwCacheType type=C_NATIVE)
Definition: VwParser.cpp:254
hash_func_t hasher
Hash function to use, of type hash_func_t.
Definition: VwParser.h:255
void set_cache_parameters(char *fname, EVwCacheType type=C_NATIVE)
Definition: VwParser.h:99
Matrix::Scalar max(Matrix m)
Definition: Redux.h:68

SHOGUN Machine Learning Toolbox - Documentation