SHOGUN  3.2.1
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
LibSVMFile.h
Go to the documentation of this file.
1 /*
2  * This program is free software; you can redistribute it and/or modify
3  * it under the terms of the GNU General Public License as published by
4  * the Free Software Foundation; either version 3 of the License, or
5  * (at your option) any later version.
6  *
7  * Written (W) 2014 Jiaolong Xu
8  * Written (W) 2013 Evgeniy Andreev (gsomix)
9  * Written (W) 2010 Soeren Sonnenburg
10  */
11 
12 #ifndef __LIBSVMFILE_H__
13 #define __LIBSVMFILE_H__
14 
15 #include <shogun/lib/config.h>
16 #include <shogun/io/File.h>
17 
18 namespace shogun
19 {
20 
21 class CDelimiterTokenizer;
22 class CLineReader;
23 class CParser;
24 template <class ST> class SGString;
25 template <class T> class SGSparseVector;
26 
34 class CLibSVMFile : public CFile
35 {
36 public:
38  CLibSVMFile();
39 
45  CLibSVMFile(FILE* f, const char* name=NULL);
46 
53  CLibSVMFile(const char* fname, char rw='r', const char* name=NULL);
54 
56  virtual ~CLibSVMFile();
57 
58 #ifndef SWIG // SWIG should skip this part
59 
67  virtual void get_sparse_matrix(
68  SGSparseVector<bool>*& matrix, int32_t& num_feat, int32_t& num_vec);
69  virtual void get_sparse_matrix(
70  SGSparseVector<uint8_t>*& matrix, int32_t& num_feat, int32_t& num_vec);
71  virtual void get_sparse_matrix(
72  SGSparseVector<int8_t>*& matrix, int32_t& num_feat, int32_t& num_vec);
73  virtual void get_sparse_matrix(
74  SGSparseVector<char>*& matrix, int32_t& num_feat, int32_t& num_vec);
75  virtual void get_sparse_matrix(
76  SGSparseVector<int32_t>*& matrix, int32_t& num_feat, int32_t& num_vec);
77  virtual void get_sparse_matrix(
78  SGSparseVector<uint32_t>*& matrix, int32_t& num_feat, int32_t& num_vec);
79  virtual void get_sparse_matrix(
80  SGSparseVector<int64_t>*& matrix, int32_t& num_feat, int32_t& num_vec);
81  virtual void get_sparse_matrix(
82  SGSparseVector<uint64_t>*& matrix, int32_t& num_feat, int32_t& num_vec);
83  virtual void get_sparse_matrix(
84  SGSparseVector<int16_t>*& matrix, int32_t& num_feat, int32_t& num_vec);
85  virtual void get_sparse_matrix(
86  SGSparseVector<uint16_t>*& matrix, int32_t& num_feat, int32_t& num_vec);
87  virtual void get_sparse_matrix(
88  SGSparseVector<float32_t>*& matrix, int32_t& num_feat, int32_t& num_vec);
89  virtual void get_sparse_matrix(
90  SGSparseVector<float64_t>*& matrix, int32_t& num_feat, int32_t& num_vec);
91  virtual void get_sparse_matrix(
92  SGSparseVector<floatmax_t>*& matrix, int32_t& num_feat, int32_t& num_vec);
94 
103  virtual void get_sparse_matrix(
104  SGSparseVector<bool>*& matrix, int32_t& num_feat, int32_t& num_vec,
105  float64_t*& labels, bool load_labels=true);
106  virtual void get_sparse_matrix(
107  SGSparseVector<uint8_t>*& matrix, int32_t& num_feat, int32_t& num_vec,
108  float64_t*& labels, bool load_labels=true);
109  virtual void get_sparse_matrix(
110  SGSparseVector<int8_t>*& matrix, int32_t& num_feat, int32_t& num_vec,
111  float64_t*& labels, bool load_labels=true);
112  virtual void get_sparse_matrix(
113  SGSparseVector<char>*& matrix, int32_t& num_feat, int32_t& num_vec,
114  float64_t*& labels, bool load_labels=true);
115  virtual void get_sparse_matrix(
116  SGSparseVector<int32_t>*& matrix, int32_t& num_feat, int32_t& num_vec,
117  float64_t*& labels, bool load_labels=true);
118  virtual void get_sparse_matrix(
119  SGSparseVector<uint32_t>*& matrix, int32_t& num_feat, int32_t& num_vec,
120  float64_t*& labels, bool load_labels=true);
121  virtual void get_sparse_matrix(
122  SGSparseVector<int64_t>*& matrix, int32_t& num_feat, int32_t& num_vec,
123  float64_t*& labels, bool load_labels=true);
124  virtual void get_sparse_matrix(
125  SGSparseVector<uint64_t>*& matrix, int32_t& num_feat, int32_t& num_vec,
126  float64_t*& labels, bool load_labels=true);
127  virtual void get_sparse_matrix(
128  SGSparseVector<int16_t>*& matrix, int32_t& num_feat, int32_t& num_vec,
129  float64_t*& labels, bool load_labels=true);
130  virtual void get_sparse_matrix(
131  SGSparseVector<uint16_t>*& matrix, int32_t& num_feat, int32_t& num_vec,
132  float64_t*& labels, bool load_labels=true);
133  virtual void get_sparse_matrix(
134  SGSparseVector<float32_t>*& matrix, int32_t& num_feat, int32_t& num_vec,
135  float64_t*& labels, bool load_labels=true);
136  virtual void get_sparse_matrix(
137  SGSparseVector<float64_t>*& matrix, int32_t& num_feat, int32_t& num_vec,
138  float64_t*& labels, bool load_labels=true);
139  virtual void get_sparse_matrix(
140  SGSparseVector<floatmax_t>*& matrix, int32_t& num_feat, int32_t& num_vec,
141  float64_t*& labels, bool load_labels=true);
143 
152  void get_sparse_matrix(
153  SGSparseVector<bool>*& matrix_feat, int32_t & num_feat, int32_t & num_vec,
154  SGVector<float64_t>*& multilabel, int32_t & num_classes, bool load_labels=true);
155  void get_sparse_matrix(
156  SGSparseVector<uint8_t>*& matrix_feat, int32_t & num_feat, int32_t & num_vec,
157  SGVector<float64_t>*& multilabel, int32_t & num_classes, bool load_labels=true);
158  void get_sparse_matrix(
159  SGSparseVector<int8_t>*& matrix_feat, int32_t & num_feat, int32_t & num_vec,
160  SGVector<float64_t>*& multilabel, int32_t & num_classes, bool load_labels=true);
161  void get_sparse_matrix(
162  SGSparseVector<char>*& matrix_feat, int32_t & num_feat, int32_t & num_vec,
163  SGVector<float64_t>*& multilabel, int32_t & num_classes, bool load_labels=true);
164  void get_sparse_matrix(
165  SGSparseVector<int32_t>*& matrix_feat, int32_t & num_feat, int32_t & num_vec,
166  SGVector<float64_t>*& multilabel, int32_t & num_classes, bool load_labels=true);
167  void get_sparse_matrix(
168  SGSparseVector<uint32_t>*& matrix_feat, int32_t & num_feat, int32_t & num_vec,
169  SGVector<float64_t>*& multilabel, int32_t & num_classes, bool load_labels=true);
170  void get_sparse_matrix(
171  SGSparseVector<int64_t>*& matrix_feat, int32_t & num_feat, int32_t & num_vec,
172  SGVector<float64_t>*& multilabel, int32_t & num_classes, bool load_labels=true);
173  void get_sparse_matrix(
174  SGSparseVector<uint64_t>*& matrix_feat, int32_t & num_feat, int32_t & num_vec,
175  SGVector<float64_t>*& multilabel, int32_t & num_classes, bool load_labels=true);
176  void get_sparse_matrix(
177  SGSparseVector<int16_t>*& matrix_feat, int32_t & num_feat, int32_t & num_vec,
178  SGVector<float64_t>*& multilabel, int32_t & num_classes, bool load_labels=true);
179  void get_sparse_matrix(
180  SGSparseVector<uint16_t>*& matrix_feat, int32_t & num_feat, int32_t & num_vec,
181  SGVector<float64_t>*& multilabel, int32_t & num_classes, bool load_labels=true);
182  void get_sparse_matrix(
183  SGSparseVector<float32_t>*& matrix_feat, int32_t & num_feat, int32_t & num_vec,
184  SGVector<float64_t>*& multilabel, int32_t & num_classes, bool load_labels=true);
185  void get_sparse_matrix(
186  SGSparseVector<float64_t>*& matrix_feat, int32_t & num_feat, int32_t & num_vec,
187  SGVector<float64_t>*& multilabel, int32_t & num_classes, bool load_labels=true);
188  void get_sparse_matrix(
189  SGSparseVector<floatmax_t>*& matrix_feat, int32_t & num_feat, int32_t & num_vec,
190  SGVector<float64_t>*& multilabel, int32_t & num_classes, bool load_labels=true);
192 
200  virtual void set_sparse_matrix(
201  const SGSparseVector<bool>* matrix, int32_t num_feat, int32_t num_vec);
202  virtual void set_sparse_matrix(
203  const SGSparseVector<uint8_t>* matrix, int32_t num_feat, int32_t num_vec);
204  virtual void set_sparse_matrix(
205  const SGSparseVector<int8_t>* matrix, int32_t num_feat, int32_t num_vec);
206  virtual void set_sparse_matrix(
207  const SGSparseVector<char>* matrix, int32_t num_feat, int32_t num_vec);
208  virtual void set_sparse_matrix(
209  const SGSparseVector<int32_t>* matrix, int32_t num_feat, int32_t num_vec);
210  virtual void set_sparse_matrix(
211  const SGSparseVector<uint32_t>* matrix, int32_t num_feat, int32_t num_vec);
212  virtual void set_sparse_matrix(
213  const SGSparseVector<int64_t>* matrix, int32_t num_feat, int32_t num_vec);
214  virtual void set_sparse_matrix(
215  const SGSparseVector<uint64_t>* matrix, int32_t num_feat, int32_t num_vec);
216  virtual void set_sparse_matrix(
217  const SGSparseVector<int16_t>* matrix, int32_t num_feat, int32_t num_vec);
218  virtual void set_sparse_matrix(
219  const SGSparseVector<uint16_t>* matrix, int32_t num_feat, int32_t num_vec);
220  virtual void set_sparse_matrix(
221  const SGSparseVector<float32_t>* matrix, int32_t num_feat, int32_t num_vec);
222  virtual void set_sparse_matrix(
223  const SGSparseVector<float64_t>* matrix, int32_t num_feat, int32_t num_vec);
224  virtual void set_sparse_matrix(
225  const SGSparseVector<floatmax_t>* matrix, int32_t num_feat, int32_t num_vec);
227 
235  virtual void set_sparse_matrix(
236  const SGSparseVector<bool>* matrix, int32_t num_feat, int32_t num_vec,
237  const float64_t* labels);
238  virtual void set_sparse_matrix(
239  const SGSparseVector<uint8_t>* matrix, int32_t num_feat, int32_t num_vec,
240  const float64_t* labels);
241  virtual void set_sparse_matrix(
242  const SGSparseVector<int8_t>* matrix, int32_t num_feat, int32_t num_vec,
243  const float64_t* labels);
244  virtual void set_sparse_matrix(
245  const SGSparseVector<char>* matrix, int32_t num_feat, int32_t num_vec,
246  const float64_t* labels);
247  virtual void set_sparse_matrix(
248  const SGSparseVector<int32_t>* matrix, int32_t num_feat, int32_t num_vec,
249  const float64_t* labels);
250  virtual void set_sparse_matrix(
251  const SGSparseVector<uint32_t>* matrix, int32_t num_feat, int32_t num_vec,
252  const float64_t* labels);
253  virtual void set_sparse_matrix(
254  const SGSparseVector<int64_t>* matrix, int32_t num_feat, int32_t num_vec,
255  const float64_t* labels);
256  virtual void set_sparse_matrix(
257  const SGSparseVector<uint64_t>* matrix, int32_t num_feat, int32_t num_vec,
258  const float64_t* labels);
259  virtual void set_sparse_matrix(
260  const SGSparseVector<int16_t>* matrix, int32_t num_feat, int32_t num_vec,
261  const float64_t* labels);
262  virtual void set_sparse_matrix(
263  const SGSparseVector<uint16_t>* matrix, int32_t num_feat, int32_t num_vec,
264  const float64_t* labels);
265  virtual void set_sparse_matrix(
266  const SGSparseVector<float32_t>* matrix, int32_t num_feat, int32_t num_vec,
267  const float64_t* labels);
268  virtual void set_sparse_matrix(
269  const SGSparseVector<float64_t>* matrix, int32_t num_feat, int32_t num_vec,
270  const float64_t* labels);
271  virtual void set_sparse_matrix(
272  const SGSparseVector<floatmax_t>* matrix, int32_t num_feat, int32_t num_vec,
273  const float64_t* labels);
275 
283  void set_sparse_matrix(
284  const SGSparseVector<bool>* matrix, int32_t num_feat, int32_t num_vec,
285  const SGVector<float64_t>* multilabel);
286  void set_sparse_matrix(
287  const SGSparseVector<uint8_t>* matrix, int32_t num_feat, int32_t num_vec,
288  const SGVector<float64_t>* multilabel);
289  void set_sparse_matrix(
290  const SGSparseVector<int8_t>* matrix, int32_t num_feat, int32_t num_vec,
291  const SGVector<float64_t>* multilabel);
292  void set_sparse_matrix(
293  const SGSparseVector<char>* matrix, int32_t num_feat, int32_t num_vec,
294  const SGVector<float64_t>* multilabel);
295  void set_sparse_matrix(
296  const SGSparseVector<int32_t>* matrix, int32_t num_feat, int32_t num_vec,
297  const SGVector<float64_t>* multilabel);
298  void set_sparse_matrix(
299  const SGSparseVector<uint32_t>* matrix, int32_t num_feat, int32_t num_vec,
300  const SGVector<float64_t>* multilabel);
301  void set_sparse_matrix(
302  const SGSparseVector<int64_t>* matrix, int32_t num_feat, int32_t num_vec,
303  const SGVector<float64_t>* multilabel);
304  void set_sparse_matrix(
305  const SGSparseVector<uint64_t>* matrix, int32_t num_feat, int32_t num_vec,
306  const SGVector<float64_t>* multilabel);
307  void set_sparse_matrix(
308  const SGSparseVector<int16_t>* matrix, int32_t num_feat, int32_t num_vec,
309  const SGVector<float64_t>* multilabel);
310  void set_sparse_matrix(
311  const SGSparseVector<uint16_t>* matrix, int32_t num_feat, int32_t num_vec,
312  const SGVector<float64_t>* multilabel);
313  void set_sparse_matrix(
314  const SGSparseVector<float32_t>* matrix, int32_t num_feat, int32_t num_vec,
315  const SGVector<float64_t>* multilabel);
316  void set_sparse_matrix(
317  const SGSparseVector<float64_t>* matrix, int32_t num_feat, int32_t num_vec,
318  const SGVector<float64_t>* multilabel);
319  void set_sparse_matrix(
320  const SGSparseVector<floatmax_t>* matrix, int32_t num_feat, int32_t num_vec,
321  const SGVector<float64_t>* multilabel);
323 
324 #endif // #ifndef SWIG // SWIG should skip this part
325 
326  virtual const char* get_name() const { return "LibSVMFile"; }
327 
328 private:
330  void init();
331 
333  void init_with_defaults();
334 
336  int32_t get_num_lines();
337 
339  bool is_feat_entry(const SGVector<char> entry);
340 private:
342  char m_delimiter_feat;
343 
345  char m_delimiter_label;
346 
348  CLineReader* m_line_reader;
349 
351  CParser* m_parser;
352 
354  CDelimiterTokenizer* m_line_tokenizer;
355 
357  CDelimiterTokenizer* m_whitespace_tokenizer;
358 
360  CDelimiterTokenizer* m_delimiter_feat_tokenizer;
361 
363  CDelimiterTokenizer* m_delimiter_label_tokenizer;
364  };
365 
366 }
367 
368 #endif

SHOGUN Machine Learning Toolbox - Documentation