SHOGUN  6.0.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules
DotFeatures.cpp
Go to the documentation of this file.
1 /*
2  * This program is free software; you can redistribute it and/or modify
3  * it under the terms of the GNU General Public License as published by
4  * the Free Software Foundation; either version 3 of the License, or
5  * (at your option) any later version.
6  *
7  * Written (W) 2009 Soeren Sonnenburg
8  * Copyright (C) 2009 Fraunhofer Institute FIRST and Max-Planck-Society
9  */
10 
12 #include <shogun/io/SGIO.h>
13 #include <shogun/lib/Signal.h>
14 #include <shogun/lib/Time.h>
16 #include <shogun/base/Parallel.h>
17 #include <shogun/base/Parameter.h>
18 
19 #ifdef HAVE_OPENMP
20 #include <omp.h>
21 #endif
22 
23 using namespace shogun;
24 
25 
27  :CFeatures(size), combined_weight(1.0)
28 {
29  init();
30 }
31 
32 
34  :CFeatures(orig), combined_weight(orig.combined_weight)
35 {
36  init();
37 }
38 
39 
41  :CFeatures(loader)
42 {
43  init();
44 }
45 
47 {
48  return dense_dot(vec_idx1, vec2.vector, vec2.vlen);
49 }
50 
51 void CDotFeatures::dense_dot_range(float64_t* output, int32_t start, int32_t stop, float64_t* alphas, float64_t* vec, int32_t dim, float64_t b)
52 {
53  ASSERT(output)
54  // write access is internally between output[start..stop] so the following
55  // line is necessary to write to output[0...(stop-start-1)]
56  output-=start;
57  ASSERT(start>=0)
58  ASSERT(start<stop)
59  ASSERT(stop<=get_num_vectors())
60 
61  int32_t num_vectors=stop-start;
62  ASSERT(num_vectors>0)
63 
65 
66  int32_t num_threads;
67  int32_t step;
68  #pragma omp parallel shared(num_threads, step)
69  {
70 #ifdef HAVE_OPENMP
71  #pragma omp single
72  {
73  num_threads=omp_get_num_threads();
74  step=num_vectors/num_threads;
75  num_threads--;
76  }
77  int32_t thread_num=omp_get_thread_num();
78 #else
79  num_threads=0;
80  step=num_vectors;
81  int32_t thread_num=0;
82 #endif
83  bool progress=false; // (thread_num == 0);
84 
85  int32_t t_start=thread_num*step;
86  int32_t t_stop=(thread_num==num_threads) ? stop : (thread_num+1)*step;
87 
88 #ifdef WIN32
89  for (int32_t i=t_start; i<t_stop; i++)
90 #else
91  for (int32_t i=t_start; i<t_stop &&
93 #endif
94  {
95  if (alphas)
96  output[i]=alphas[i]*this->dense_dot(i, vec, dim)+b;
97  else
98  output[i]=this->dense_dot(i, vec, dim)+b;
99  if (progress)
100  this->display_progress(t_start, t_stop, i);
101  }
102  }
103 
104 #ifndef WIN32
106  SG_INFO("prematurely stopped. \n")
107 #endif
108 }
109 
110 void CDotFeatures::dense_dot_range_subset(int32_t* sub_index, int32_t num, float64_t* output, float64_t* alphas, float64_t* vec, int32_t dim, float64_t b)
111 {
112  ASSERT(sub_index)
113  ASSERT(output)
114 
116 
117  int32_t num_threads;
118  int32_t step;
119  #pragma omp parallel shared(num_threads, step)
120  {
121 #ifdef HAVE_OPENMP
122  #pragma omp single
123  {
124  num_threads=omp_get_num_threads();
125  step=num/num_threads;
126  num_threads--;
127  }
128  int32_t thread_num=omp_get_thread_num();
129 #else
130  num_threads=0;
131  step = num;
132  int32_t thread_num=0;
133 #endif
134  bool progress=false; // (thread_num == 0);
135 
136  int32_t t_start=thread_num*step;
137  int32_t t_stop=(thread_num==num_threads) ? num : (thread_num+1)*step;
138 
139 #ifdef WIN32
140  for (int32_t i=t_start; i<t_stop; i++)
141 #else
142  for (int32_t i=t_start; i<t_stop &&
144 #endif
145  {
146  if (alphas)
147  output[i]=alphas[sub_index[i]]*this->dense_dot(sub_index[i], vec, dim)+b;
148  else
149  output[i]=this->dense_dot(sub_index[i], vec, dim)+b;
150  if (progress)
151  this->display_progress(t_start, t_stop, i);
152  }
153  }
154 
155 #ifndef WIN32
157  SG_INFO("prematurely stopped. \n")
158 #endif
159 }
160 
162 {
163 
164  int64_t offs=0;
165  int32_t num=get_num_vectors();
166  int32_t dim=get_dim_feature_space();
167  ASSERT(num>0)
168  ASSERT(dim>0)
169 
170  SGMatrix<float64_t> m(dim, num);
171  m.zero();
172 
173  for (int32_t i=0; i<num; i++)
174  {
175  add_to_dense_vec(1.0, i, &(m.matrix[offs]), dim);
176  offs+=dim;
177  }
178 
179  return m;
180 }
181 
183 {
184 
185  int32_t dim=get_dim_feature_space();
186  ASSERT(num>=0 && num<=get_num_vectors())
187  ASSERT(dim>0)
188 
189  SGVector<float64_t> v(dim);
190  v.zero();
191  add_to_dense_vec(1.0, num, v.vector, dim);
192  return v;
193 }
194 
196 {
197  int32_t num=get_num_vectors();
198  int32_t d=get_dim_feature_space();
199  float64_t* w= SG_MALLOC(float64_t, d);
201 
202  CTime t;
203  float64_t start_cpu=t.get_runtime();
204  float64_t start_wall=t.get_curtime();
205  for (int32_t r=0; r<repeats; r++)
206  {
207  for (int32_t i=0; i<num; i++)
208  add_to_dense_vec(1.172343*(r+1), i, w, d);
209  }
210 
211  SG_PRINT("Time to process %d x num=%d add_to_dense_vector ops: cputime %fs walltime %fs\n",
212  repeats, num, (t.get_runtime()-start_cpu)/repeats,
213  (t.get_curtime()-start_wall)/repeats);
214 
215  SG_FREE(w);
216 }
217 
219 {
220  int32_t num=get_num_vectors();
221  int32_t d=get_dim_feature_space();
222  float64_t* w= SG_MALLOC(float64_t, d);
223  float64_t* out= SG_MALLOC(float64_t, num);
224  float64_t* alphas= SG_MALLOC(float64_t, num);
226  SGVector<float64_t>::range_fill_vector(alphas, num, 1.2345);
227  //SGVector<float64_t>::fill_vector(w, d, 17.0);
228  //SGVector<float64_t>::fill_vector(alphas, num, 1.2345);
229 
230  CTime t;
231  float64_t start_cpu=t.get_runtime();
232  float64_t start_wall=t.get_curtime();
233 
234  for (int32_t r=0; r<repeats; r++)
235  dense_dot_range(out, 0, num, alphas, w, d, 23);
236 
237 #ifdef DEBUG_DOTFEATURES
238  CMath::display_vector(out, 40, "dense_dot_range");
239  float64_t* out2= SG_MALLOC(float64_t, num);
240 
241  for (int32_t r=0; r<repeats; r++)
242  {
243  CMath::fill_vector(out2, num, 0.0);
244  for (int32_t i=0; i<num; i++)
245  out2[i]+=dense_dot(i, w, d)*alphas[i]+23;
246  }
247  CMath::display_vector(out2, 40, "dense_dot");
248  for (int32_t i=0; i<num; i++)
249  out2[i]-=out[i];
250  CMath::display_vector(out2, 40, "diff");
251 #endif
252  SG_PRINT("Time to process %d x num=%d dense_dot_range ops: cputime %fs walltime %fs\n",
253  repeats, num, (t.get_runtime()-start_cpu)/repeats,
254  (t.get_curtime()-start_wall)/repeats);
255 
256  SG_FREE(alphas);
257  SG_FREE(out);
258  SG_FREE(w);
259 }
260 
262 {
263  int32_t num=get_num_vectors();
264  int32_t dim=get_dim_feature_space();
265  ASSERT(num>0)
266  ASSERT(dim>0)
267 
269  memset(mean.vector, 0, sizeof(float64_t)*dim);
270 
271  for (int i = 0; i < num; i++)
272  add_to_dense_vec(1, i, mean.vector, dim);
273  for (int j = 0; j < dim; j++)
274  mean.vector[j] /= num;
275 
276  return mean;
277 }
278 
280 {
281  ASSERT(lhs && rhs)
283 
284  int32_t num_lhs=lhs->get_num_vectors();
285  int32_t num_rhs=rhs->get_num_vectors();
286  int32_t dim=lhs->get_dim_feature_space();
287  ASSERT(num_lhs>0)
288  ASSERT(num_rhs>0)
289  ASSERT(dim>0)
290 
292  memset(mean.vector, 0, sizeof(float64_t)*dim);
293 
294  for (int i = 0; i < num_lhs; i++)
295  lhs->add_to_dense_vec(1, i, mean.vector, dim);
296  for (int i = 0; i < num_rhs; i++)
297  rhs->add_to_dense_vec(1, i, mean.vector, dim);
298  for (int j = 0; j < dim; j++)
299  mean.vector[j] /= (num_lhs+num_rhs);
300 
301  return mean;
302 }
303 
305 {
306  int32_t num=get_num_vectors();
307  int32_t dim=get_dim_feature_space();
308  ASSERT(num>0)
309  ASSERT(dim>0)
310 
311  SGMatrix<float64_t> cov(dim, dim);
312 
313  memset(cov.matrix, 0, sizeof(float64_t)*dim*dim);
314 
316 
317  for (int i = 0; i < num; i++)
318  {
320  SGVector<float64_t>::add(v.vector, 1, v.vector, -1, mean.vector, v.vlen);
321  for (int m = 0; m < v.vlen; m++)
322  {
323  for (int n = 0; n <= m ; n++)
324  {
325  (cov.matrix)[m*v.vlen+n] += v.vector[m]*v.vector[n];
326  }
327  }
328  }
329  for (int m = 0; m < dim; m++)
330  {
331  for (int n = 0; n <= m ; n++)
332  {
333  (cov.matrix)[m*dim+n] /= num;
334  }
335  }
336  for (int m = 0; m < dim-1; m++)
337  {
338  for (int n = m+1; n < dim; n++)
339  {
340  (cov.matrix)[m*dim+n] = (cov.matrix)[n*dim+m];
341  }
342  }
343  return cov;
344 }
345 
347 {
348  CDotFeatures* feats[2];
349  feats[0]=lhs;
350  feats[1]=rhs;
351 
352  int32_t nums[2], dims[2], num=0;
353 
354  for (int i = 0; i < 2; i++)
355  {
356  nums[i]=feats[i]->get_num_vectors();
357  dims[i]=feats[i]->get_dim_feature_space();
358  ASSERT(nums[i]>0)
359  ASSERT(dims[i]>0)
360  num += nums[i];
361  }
362 
363  ASSERT(dims[0]==dims[1])
364  int32_t dim = dims[0];
365 
366  SGMatrix<float64_t> cov(dim, dim);
367 
368  memset(cov.matrix, 0, sizeof(float64_t)*dim*dim);
369 
371 
372  for (int i = 0; i < 2; i++)
373  {
374  for (int j = 0; j < nums[i]; j++)
375  {
377  SGVector<float64_t>::add(v.vector, 1, v.vector, -1, mean.vector, v.vlen);
378  for (int m = 0; m < v.vlen; m++)
379  {
380  for (int n = 0; n <= m; n++)
381  {
382  (cov.matrix)[m*v.vlen+n] += v.vector[m]*v.vector[n];
383  }
384  }
385  }
386  }
387  for (int m = 0; m < dim; m++)
388  {
389  for (int n = 0; n <= m; n++)
390  {
391  (cov.matrix)[m*dim+n] /= num;
392  }
393  }
394  for (int m = 0; m < dim-1; m++)
395  {
396  for (int n = m+1; n < dim; n++)
397  {
398  (cov.matrix[m*dim+n]) = (cov.matrix)[n*dim+m];
399  }
400  }
401 
402  return cov;
403 }
404 
405 void CDotFeatures::display_progress(int32_t start, int32_t stop, int32_t v)
406 {
407  int32_t num_vectors=stop-start;
408  int32_t i=v-start;
409 
410  if ( (i% (num_vectors/100+1))== 0)
411  SG_PROGRESS(v, 0.0, num_vectors-1)
412 }
413 
414 void CDotFeatures::init()
415 {
417  m_parameters->add(&combined_weight, "combined_weight",
418  "Feature weighting in combined dot features.");
419 }
Class Time that implements a stopwatch based on either cpu time or wall clock time.
Definition: Time.h:42
virtual void dense_dot_range(float64_t *output, int32_t start, int32_t stop, float64_t *alphas, float64_t *vec, int32_t dim, float64_t b)
Definition: DotFeatures.cpp:51
#define SG_INFO(...)
Definition: SGIO.h:117
static void fill_vector(T *vec, int32_t len, T value)
Definition: SGVector.cpp:264
#define SG_PROGRESS(...)
Definition: SGIO.h:141
virtual float64_t dense_dot(int32_t vec_idx1, const float64_t *vec2, int32_t vec2_len)=0
CDotFeatures(int32_t size=0)
Definition: DotFeatures.cpp:26
static float64_t get_runtime()
Definition: Time.h:106
virtual int32_t get_num_vectors() const =0
void set_property(EFeatureProperty p)
Definition: Features.cpp:300
Parameter * m_parameters
Definition: SGObject.h:567
virtual void add_to_dense_vec(float64_t alpha, int32_t vec_idx1, float64_t *vec2, int32_t vec2_len, bool abs_val=false)=0
void benchmark_dense_dot_range(int32_t repeats=5)
virtual float64_t dense_dot_sgvec(int32_t vec_idx1, const SGVector< float64_t > vec2)
Definition: DotFeatures.cpp:46
std::enable_if<!std::is_same< T, complex128_t >::value, float64_t >::type mean(const Container< T > &a)
Features that support dot products among other operations.
Definition: DotFeatures.h:44
virtual int32_t get_dim_feature_space() const =0
static float64_t get_curtime()
Definition: Time.h:116
virtual SGVector< float64_t > get_mean()
static SGMatrix< float64_t > compute_cov(CDotFeatures *lhs, CDotFeatures *rhs)
void add(bool *param, const char *name, const char *description="")
Definition: Parameter.cpp:38
index_t vlen
Definition: SGVector.h:545
#define SG_PRINT(...)
Definition: SGIO.h:136
#define ASSERT(x)
Definition: SGIO.h:200
static void clear_cancel()
Definition: Signal.cpp:126
virtual void dense_dot_range_subset(int32_t *sub_index, int32_t num, float64_t *output, float64_t *alphas, float64_t *vec, int32_t dim, float64_t b)
double float64_t
Definition: common.h:60
static void range_fill_vector(T *vec, int32_t len, T start=0)
Definition: SGVector.cpp:271
A File access base class.
Definition: File.h:34
void benchmark_add_to_dense_vector(int32_t repeats=5)
static bool cancel_computations()
Definition: Signal.h:111
all of classes and functions are contained in the shogun namespace
Definition: class_list.h:18
The class Features is the base class of all feature objects.
Definition: Features.h:68
SGVector< float64_t > get_computed_dot_feature_vector(int32_t num)
void display_progress(int32_t start, int32_t stop, int32_t v)
float64_t combined_weight
feature weighting in combined dot features
Definition: DotFeatures.h:245
SGMatrix< float64_t > get_computed_dot_feature_matrix()
void add(const SGVector< T > x)
Definition: SGVector.cpp:310
virtual SGMatrix< float64_t > get_cov()

SHOGUN Machine Learning Toolbox - Documentation