SHOGUN  6.0.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules
StreamingDenseFeatures.cpp
Go to the documentation of this file.
1 /*
2  * This program is free software; you can redistribute it and/or modify
3  * it under the terms of the GNU General Public License as published by
4  * the Free Software Foundation; either version 3 of the License, or
5  * (at your option) any later version.
6  *
7  * Written (W) 2011 Shashwat Lal Das
8  * Written (W) 2012 Heiko Strathmann
9  * Copyright (C) 2011 Berlin Institute of Technology and Max-Planck-Society
10  */
11 
15 
16 namespace shogun
17 {
18 template<class T>
21 {
23  init();
24  parser.set_free_vector_after_release(false);
25 }
26 
27 template<class T>
29  bool is_labelled, int32_t size) :
31 {
32  init(file, is_labelled, size);
34  parser.set_free_vector_after_release(false);
35 }
36 
38  CDenseFeatures<T>* dense_features, float64_t* lab) :
40 {
41  REQUIRE(dense_features, "%s::CStreamingDenseFeatures(): Features needed!\n")
42 
44  bool is_labelled;
45  int32_t size=1024;
46 
47  is_labelled=lab;
48  file=new CStreamingFileFromDenseFeatures<T>(dense_features, lab);
49  init(file, is_labelled, size);
51  parser.set_free_vector_after_release(false);
52  parser.set_free_vectors_on_destruct(false);
53  seekable=true;
54 }
55 
57 {
58  SG_DEBUG("entering %s::~CStreamingDenseFeatures()\n", get_name())
59  /* needed to prevent double free memory errors */
60  current_vector.vector=NULL;
61  current_vector.vlen=0;
62  SG_DEBUG("leaving %s::~CStreamingDenseFeatures()\n", get_name())
63 }
64 
66 {
67  if (seekable)
68  {
69  ((CStreamingFileFromDenseFeatures<T>*)working_file)->reset_stream();
70  if (parser.is_running())
71  parser.end_parser();
72  parser.exit_parser();
73  parser.init(working_file, has_labels, 1);
74  parser.set_free_vector_after_release(false);
75  parser.set_free_vectors_on_destruct(false);
76  parser.start_parser();
77  }
78 }
79 
81  const float32_t* vec2, int32_t vec2_len)
82 {
83  ASSERT(vec2_len==current_vector.vlen)
84  float32_t result=0;
85 
86  for (int32_t i=0; i<current_vector.vlen; i++)
87  result+=current_vector[i]*vec2[i];
88 
89  return result;
90 }
91 
93  const float64_t* vec2, int32_t vec2_len)
94 {
95  ASSERT(vec2_len==current_vector.vlen)
96  float64_t result=0;
97 
98  for (int32_t i=0; i<current_vector.vlen; i++)
99  result+=current_vector[i]*vec2[i];
100 
101  return result;
102 }
103 
105  float32_t alpha, float32_t* vec2, int32_t vec2_len, bool abs_val)
106 {
107  ASSERT(vec2_len==current_vector.vlen)
108 
109  if (abs_val)
110  {
111  for (int32_t i=0; i<current_vector.vlen; i++)
112  vec2[i]+=alpha*CMath::abs(current_vector[i]);
113  }
114  else
115  {
116  for (int32_t i=0; i<current_vector.vlen; i++)
117  vec2[i]+=alpha*current_vector[i];
118  }
119 }
120 
122  float64_t alpha, float64_t* vec2, int32_t vec2_len, bool abs_val)
123 {
124  ASSERT(vec2_len==current_vector.vlen)
125 
126  if (abs_val)
127  {
128  for (int32_t i=0; i<current_vector.vlen; i++)
129  vec2[i]+=alpha*CMath::abs(current_vector[i]);
130  }
131  else
132  {
133  for (int32_t i=0; i<current_vector.vlen; i++)
134  vec2[i]+=alpha*current_vector[i];
135  }
136 }
137 
139 {
140  return current_vector.vlen;
141 }
142 
143 template<class T> int32_t CStreamingDenseFeatures<T>::get_num_vectors() const
144 {
145  return 1;
146 }
147 
148 template<class T>
150 {
151  parser.set_read_vector(&CStreamingFile::get_vector);
152 }
153 
154 template<class T>
156 {
157  parser.set_read_vector_and_label(&CStreamingFile::get_vector_and_label);
158 }
159 
160 #define GET_FEATURE_TYPE(f_type, sg_type) \
161 template<> EFeatureType CStreamingDenseFeatures<sg_type>::get_feature_type() const \
162 { \
163  return f_type; \
164 }
165 
168 GET_FEATURE_TYPE(F_BYTE, uint8_t)
169 GET_FEATURE_TYPE(F_BYTE, int8_t)
170 GET_FEATURE_TYPE(F_SHORT, int16_t)
171 GET_FEATURE_TYPE(F_WORD, uint16_t)
172 GET_FEATURE_TYPE(F_INT, int32_t)
173 GET_FEATURE_TYPE(F_UINT, uint32_t)
174 GET_FEATURE_TYPE(F_LONG, int64_t)
175 GET_FEATURE_TYPE(F_ULONG, uint64_t)
179 #undef GET_FEATURE_TYPE
180 
181 template<class T>
182 void CStreamingDenseFeatures<T>::init()
183 {
184  working_file=NULL;
185  seekable=false;
186 
187  /* needed to prevent double free memory errors */
188  current_vector.vector=NULL;
189  current_vector.vlen=-1;
190 
191  set_generic<T>();
192 }
193 
194 template<class T>
195 void CStreamingDenseFeatures<T>::init(CStreamingFile* file, bool is_labelled,
196  int32_t size)
197 {
198  init();
199  has_labels=is_labelled;
200  working_file=file;
201  SG_REF(working_file);
202  parser.init(file, is_labelled, size);
203  seekable=false;
204 }
205 
206 template<class T>
208 {
209  if (!parser.is_running())
210  parser.start_parser();
211 }
212 
213 template<class T>
215 {
216  parser.end_parser();
217 }
218 
219 template<class T>
221 {
222  SG_DEBUG("entering\n");
223  bool ret_value;
224  ret_value=(bool)parser.get_next_example(current_vector.vector,
225  current_vector.vlen, current_label);
226 
227  SG_DEBUG("leaving\n");
228  return ret_value;
229 }
230 
231 template<class T>
233 {
234  return current_vector;
235 }
236 
237 template<class T>
239 {
240  ASSERT(has_labels)
241 
242  return current_label;
243 }
244 
245 template<class T>
247 {
248  parser.finalize_example();
249 }
250 
251 template<class T>
253 {
254  return current_vector.vlen;
255 }
256 
257 template<class T>
259 {
260  ASSERT(df)
261  ASSERT(df->get_feature_type() == get_feature_type())
262  ASSERT(df->get_feature_class() == get_feature_class())
264 
265  SGVector<T> other_vector=sf->get_vector();
266 
267  return CMath::dot(current_vector.vector, other_vector.vector, current_vector.vlen);
268 }
269 
270 template<class T>
272 {
273  int32_t len1;
274  len1=sgvec1.vlen;
275 
276  if (len1!=current_vector.vlen)
277  SG_ERROR(
278  "Lengths %d and %d not equal while computing dot product!\n", len1, current_vector.vlen);
279 
280  return CMath::dot(current_vector.vector, sgvec1.vector, len1);
281 }
282 
283 template<class T>
285 {
286  return current_vector.vlen;
287 }
288 
289 template<class T>
291 {
292  return C_STREAMING_DENSE;
293 }
294 
295 template<class T>
297  index_t num_elements)
298 {
299  SG_DEBUG("entering\n");
300  SG_DEBUG("Streaming %d elements\n", num_elements)
301 
302  REQUIRE(num_elements>0, "Requested number of feature vectors (%d) must be "
303  "positive\n", num_elements);
304 
305  /* init matrix empty, as we dont know the dimension yet */
306  SGMatrix<T> matrix;
307 
308  for (index_t i=0; i<num_elements; ++i)
309  {
310  /* check if we run out of data */
311  if (!get_next_example())
312  {
313  SG_WARNING("Ran out of streaming data, reallocating matrix and "
314  "returning!\n");
315 
316  /* allocating space for data so far, not this mighe be 0 bytes */
317  SGMatrix<T> so_far(matrix.num_rows, i);
318 
319  /* copy */
320  sg_memcpy(so_far.matrix, matrix.matrix,
321  so_far.num_rows*so_far.num_cols*sizeof(T));
322 
323  matrix=so_far;
324  break;
325  }
326  else
327  {
328  /* allocate matrix memory in first iteration */
329  if (!matrix.matrix)
330  {
331  SG_DEBUG("Allocating %dx%d matrix\n",
332  current_vector.vlen, num_elements);
333  matrix=SGMatrix<T>(current_vector.vlen, num_elements);
334  }
335 
336  /* get an example from stream and copy to feature matrix */
337  SGVector<T> vec=get_vector();
338 
339  /* check for inconsistent dimensions */
340  REQUIRE(vec.vlen==matrix.num_rows,
341  "Dimension of streamed vector (%d) does not match "
342  "dimensions of previous vectors (%d)\n",
343  vec.vlen, matrix.num_rows);
344 
345  /* copy vector into matrix */
346  sg_memcpy(&matrix.matrix[current_vector.vlen*i], vec.vector,
347  vec.vlen*sizeof(T));
348 
349  /* clean up */
350  release_example();
351  }
352 
353  }
354 
355  /* create new feature object from collected data */
356  CDenseFeatures<T>* result=new CDenseFeatures<T>(matrix);
357 
358  SG_DEBUG("leaving returning %dx%d matrix\n", matrix.num_rows,
359  matrix.num_cols);
360 
361  return result;
362 }
363 
364 template class CStreamingDenseFeatures<bool> ;
365 template class CStreamingDenseFeatures<char> ;
366 template class CStreamingDenseFeatures<int8_t> ;
367 template class CStreamingDenseFeatures<uint8_t> ;
368 template class CStreamingDenseFeatures<int16_t> ;
369 template class CStreamingDenseFeatures<uint16_t> ;
370 template class CStreamingDenseFeatures<int32_t> ;
371 template class CStreamingDenseFeatures<uint32_t> ;
372 template class CStreamingDenseFeatures<int64_t> ;
373 template class CStreamingDenseFeatures<uint64_t> ;
374 template class CStreamingDenseFeatures<float32_t> ;
375 template class CStreamingDenseFeatures<float64_t> ;
377 }
virtual int32_t get_dim_feature_space() const
virtual CFeatures * get_streamed_features(index_t num_elements)
Class CStreamingFileFromDenseFeatures is a derived class of CStreamingFile which creates an input sou...
This class implements streaming features with dense feature vectors.
int32_t index_t
Definition: common.h:72
virtual void add_to_dense_vec(float32_t alpha, float32_t *vec2, int32_t vec2_len, bool abs_val=false)
virtual void get_vector(bool *&vector, int32_t &len)
#define SG_ERROR(...)
Definition: SGIO.h:128
#define REQUIRE(x,...)
Definition: SGIO.h:205
#define GET_FEATURE_TYPE(f_type, sg_type)
index_t num_cols
Definition: SGMatrix.h:465
virtual float32_t dense_dot(const float32_t *vec2, int32_t vec2_len)
virtual EFeatureClass get_feature_class() const
#define SG_REF(x)
Definition: SGObject.h:52
index_t num_rows
Definition: SGMatrix.h:463
EFeatureClass
shogun feature class
Definition: FeatureTypes.h:38
shogun matrix
A Streaming File access class.
Definition: StreamingFile.h:34
index_t vlen
Definition: SGVector.h:545
#define ASSERT(x)
Definition: SGIO.h:200
CInputParser< T > parser
The parser object, which reads from input and returns parsed example objects.
shogun vector
double float64_t
Definition: common.h:60
long double floatmax_t
Definition: common.h:61
virtual float32_t dot(SGVector< T > vec)
virtual EFeatureClass get_feature_class() const =0
static float64_t dot(const bool *v1, const bool *v2, int32_t n)
Compute dot product between v1 and v2 (blas optimized)
Definition: Math.h:622
Streaming features that support dot products among other operations.
float float32_t
Definition: common.h:59
virtual void get_vector_and_label(bool *&vector, int32_t &len, float64_t &label)
#define SG_DEBUG(...)
Definition: SGIO.h:106
all of classes and functions are contained in the shogun namespace
Definition: class_list.h:18
bool seekable
Whether the stream is seekable.
The class Features is the base class of all feature objects.
Definition: Features.h:68
virtual int32_t get_num_vectors() const
#define SG_WARNING(...)
Definition: SGIO.h:127
virtual EFeatureType get_feature_type() const =0
static T abs(T a)
Definition: Math.h:175

SHOGUN Machine Learning Toolbox - Documentation