SHOGUN  3.2.1
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
VwParser.h
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2009 Yahoo! Inc. All rights reserved. The copyrights
3  * embodied in the content of this file are licensed under the BSD
4  * (revised) open source license.
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License as published by
8  * the Free Software Foundation; either version 3 of the License, or
9  * (at your option) any later version.
10  *
11  * Written (W) 2011 Shashwat Lal Das
12  * Adaptation of Vowpal Wabbit v5.1.
13  * Copyright (C) 2011 Berlin Institute of Technology and Max-Planck-Society.
14  */
15 
16 #ifndef _VW_PARSER_H__
17 #define _VW_PARSER_H__
18 
19 #include <shogun/lib/config.h>
20 
21 #include <shogun/base/SGObject.h>
22 #include <shogun/io/SGIO.h>
23 #include <shogun/lib/Hash.h>
26 
27 namespace shogun
28 {
31 {
32  T_VW = 1,
34  T_DENSE = 3
35 };
36 
48 class CVwParser: public CSGObject
49 {
50 public:
54  CVwParser();
55 
61  CVwParser(CVwEnvironment* env_to_use);
62 
66  virtual ~CVwParser();
67 
74  {
75  SG_REF(env);
76  return env;
77  }
78 
84  void set_env(CVwEnvironment* env_to_use)
85  {
86  env = env_to_use;
87  SG_REF(env);
88  }
89 
96  void set_cache_parameters(char * fname, EVwCacheType type = C_NATIVE)
97  {
98  init_cache(fname, type);
99  }
100 
107  {
108  return cache_type;
109  }
110 
116  void set_write_cache(bool wr_cache)
117  {
118  write_cache = wr_cache;
119  if (wr_cache)
120  init_cache(NULL);
121  else
122  if (cache_writer)
124  }
125 
132  {
133  return write_cache;
134  }
135 
141  void set_mm(float64_t label)
142  {
143  env->min_label = CMath::min(env->min_label, label);
144  if (label != FLT_MAX)
145  env->max_label = CMath::max(env->max_label, label);
146  }
147 
154  void noop_mm(float64_t label) { }
155 
162  void set_minmax(float64_t label)
163  {
164  set_mm(label);
165  }
166 
175  int32_t read_features(CIOBuffer* buf, VwExample*& ex);
176 
185  int32_t read_svmlight_features(CIOBuffer* buf, VwExample*& ae);
186 
195  int32_t read_dense_features(CIOBuffer* buf, VwExample*& ae);
196 
202  virtual const char* get_name() const { return "VwParser"; }
203 
204 protected:
211  void init_cache(char * fname, EVwCacheType type = C_NATIVE);
212 
222 
231  void tokenize(char delim, substring s, v_array<substring> &ret);
232 
243  inline char* safe_index(char *start, char v, char *max)
244  {
245  while (start != max && *start != v)
246  start++;
247  return start;
248  }
249 
250 public:
253 
254 protected:
263 
264 private:
266  v_array<substring> channels;
267  v_array<substring> words;
268  v_array<substring> name;
269 };
270 
271 }
272 #endif // _VW_PARSER_H__

SHOGUN Machine Learning Toolbox - Documentation