SHOGUN  6.0.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules
Distance.cpp
Go to the documentation of this file.
1 /*
2  * this program is free software; you can redistribute it and/or modify
3  * it under the terms of the GNU General Public License as published by
4  * the Free Software Foundation; either version 3 of the License, or
5  * (at your option) any later version.
6  *
7  * Written (W) 2006-2009 Christian Gehl
8  * Written (W) 2006-2009 Soeren Sonnenburg
9  * Copyright (C) 2006-2009 Fraunhofer Institute FIRST and Max-Planck-Society
10  */
11 
12 #include <shogun/lib/config.h>
13 #include <shogun/lib/common.h>
14 #include <shogun/io/SGIO.h>
15 #include <shogun/io/File.h>
16 #include <shogun/lib/Time.h>
17 #include <shogun/lib/Signal.h>
18 #include <shogun/base/Parallel.h>
19 #include <shogun/base/Parameter.h>
20 
23 
24 #include <string.h>
25 #ifndef _WIN32
26 #include <unistd.h>
27 #endif
28 
29 #ifdef HAVE_OPENMP
30 #include <omp.h>
31 #endif
32 
33 using namespace shogun;
34 
36 {
37  init();
38 }
39 
40 
42 {
43  init();
44  init(p_lhs, p_rhs);
45 }
46 
48 {
49  SG_FREE(precomputed_matrix);
50  precomputed_matrix=NULL;
51 
53 }
54 
55 bool CDistance::init(CFeatures* l, CFeatures* r)
56 {
57  REQUIRE(check_compatibility(l, r), "Features are not compatible!\n");
58 
59  //increase reference counts
60  SG_REF(l);
61  SG_REF(r);
62 
63  //remove references to previous features
65 
66  lhs=l;
67  rhs=r;
68 
71 
72  SG_FREE(precomputed_matrix);
73  precomputed_matrix=NULL ;
74 
75  return true;
76 }
77 
79 {
80  REQUIRE(l, "Left hand side features must be set!\n");
81  REQUIRE(r, "Right hand side features must be set!\n");
82 
84  "Right hand side of features (%s) must be of same type with left hand side features (%s)\n",
85  r->get_name(), l->get_name());
86 
87  if (l->support_compatible_class())
88  {
90  "Right hand side of features (%s) must be compatible with left hand side features (%s)\n",
91  r->get_name(), l->get_name());
92  }
93  else if (r->support_compatible_class())
94  {
96  "Right hand side of features (%s) must be compatible with left hand side features (%s)\n",
97  r->get_name(), l->get_name());
98  }
99  else
100  {
102  "Right hand side of features (%s) must be compatible with left hand side features (%s)\n",
103  r->get_name(), l->get_name());
104  }
105 
106  return true;
107 }
108 
109 void CDistance::load(CFile* loader)
110 {
113 }
114 
115 void CDistance::save(CFile* writer)
116 {
119 }
120 
122 {
123  SG_UNREF(rhs);
124  rhs = NULL;
125  num_rhs=0;
126 
127  SG_UNREF(lhs);
128  lhs = NULL;
129  num_lhs=0;
130 }
131 
133 {
134  SG_UNREF(lhs);
135  lhs = NULL;
136  num_lhs=0;
137 }
138 
141 {
142  SG_UNREF(rhs);
143  rhs = NULL;
144  num_rhs=0;
145 }
146 
148 {
149  //make sure features are compatible
150  REQUIRE(check_compatibility(lhs, r), "Features are not compatible!\n");
151 
152  //remove references to previous rhs features
153  CFeatures* tmp=rhs;
154 
155  rhs=r;
157 
158  SG_FREE(precomputed_matrix);
159  precomputed_matrix=NULL ;
160 
161  // return old features including reference count
162  return tmp;
163 }
164 
166 {
167  //make sure features are compatible
168  REQUIRE(check_compatibility(l, rhs), "Features are not compatible!\n");
169 
170  //remove references to previous rhs features
171  CFeatures* tmp=lhs;
172 
173  lhs=l;
175 
176  SG_FREE(precomputed_matrix);
177  precomputed_matrix=NULL ;
178 
179  // return old features including reference count
180  return tmp;
181 }
182 
183 float64_t CDistance::distance(int32_t idx_a, int32_t idx_b)
184 {
185  REQUIRE(idx_a < lhs->get_num_vectors() && idx_b < rhs->get_num_vectors() && \
186  idx_a >= 0 && idx_b >= 0,
187  "idx_a (%d) must be in [0,%d] and idx_b (%d) must be in [0,%d]\n",
188  idx_a, lhs->get_num_vectors()-1, idx_b, rhs->get_num_vectors()-1)
189 
190  ASSERT(lhs)
191  ASSERT(rhs)
192 
193  if (lhs==rhs)
194  {
195  int32_t num_vectors = lhs->get_num_vectors();
196 
197  if (idx_a>=num_vectors)
198  idx_a=2*num_vectors-1-idx_a;
199 
200  if (idx_b>=num_vectors)
201  idx_b=2*num_vectors-1-idx_b;
202  }
203 
204 
205  if (precompute_matrix && (precomputed_matrix==NULL) && (lhs==rhs))
207 
208  if (precompute_matrix && (precomputed_matrix!=NULL))
209  {
210  if (idx_a>=idx_b)
211  return precomputed_matrix[idx_a*(idx_a+1)/2+idx_b] ;
212  else
213  return precomputed_matrix[idx_b*(idx_b+1)/2+idx_a] ;
214  }
215 
216  return compute(idx_a, idx_b);
217 }
218 
219 void CDistance::run_distance_rhs(SGVector<float64_t>& result, const index_t idx_r_start, index_t idx_start, const index_t idx_stop, const index_t idx_a)
220 {
221  for(index_t i=idx_r_start; idx_start < idx_stop; ++i,++idx_start)
222  result.vector[i] = this->distance(idx_a,idx_start);
223 }
224 
225 void CDistance::run_distance_lhs(SGVector<float64_t>& result, const index_t idx_r_start, index_t idx_start, const index_t idx_stop, const index_t idx_b)
226 {
227  for(index_t i=idx_r_start; idx_start < idx_stop; ++i,++idx_start)
228  result.vector[i] = this->distance(idx_start,idx_b);
229 }
230 
232 {
233  int32_t num_left=lhs->get_num_vectors();
234  int32_t num_right=rhs->get_num_vectors();
235  SG_INFO("precomputing distance matrix (%ix%i)\n", num_left, num_right)
236 
237  ASSERT(num_left==num_right)
238  ASSERT(lhs==rhs)
239  int32_t num=num_left;
240 
241  SG_FREE(precomputed_matrix);
242  precomputed_matrix=SG_MALLOC(float32_t, num*(num+1)/2);
243 
244  for (int32_t i=0; i<num; i++)
245  {
246  SG_PROGRESS(i*i,0,num*num)
247  for (int32_t j=0; j<=i; j++)
248  precomputed_matrix[i*(i+1)/2+j] = compute(i,j) ;
249  }
250 
251  SG_PROGRESS(num*num,0,num*num)
252  SG_DONE()
253 }
254 
255 void CDistance::init()
256 {
257  precomputed_matrix = NULL;
258  precompute_matrix = false;
259  lhs = NULL;
260  rhs = NULL;
261  num_lhs=0;
262  num_rhs=0;
263 
264  m_parameters->add((CSGObject**) &lhs, "lhs",
265  "Feature vectors to occur on left hand side.");
266  m_parameters->add((CSGObject**) &rhs, "rhs",
267  "Feature vectors to occur on right hand side.");
268 }
269 
270 template <class T>
272 {
273  T* result = NULL;
274 
275  REQUIRE(has_features(), "no features assigned to distance\n")
276 
277  int32_t m=get_num_vec_lhs();
278  int32_t n=get_num_vec_rhs();
279 
280  int64_t total_num = int64_t(m)*n;
281  int64_t total=0;
282  int64_t total_start=0;
283  int64_t total_end=total_num;
284 
285  // if lhs == rhs and sizes match assume k(i,j)=k(j,i)
286  bool symmetric= (lhs && lhs==rhs && m==n);
287 
288  SG_DEBUG("returning distance matrix of size %dx%d\n", m, n)
289 
290  result=SG_MALLOC(T, total_num);
291 
292  int32_t num_threads;
293  int64_t step;
294  #pragma omp parallel shared(num_threads, step)
295  {
296 #ifdef HAVE_OPENMP
297  #pragma omp single
298  {
299  num_threads=omp_get_num_threads();
300  step=total_num/num_threads;
301  num_threads--;
302  }
303  int32_t thread_num=omp_get_thread_num();
304 #else
305  num_threads=0;
306  step=total_num;
307  int32_t thread_num=0;
308 #endif
309  bool verbose=(thread_num == 0);
310 
311  int32_t start=compute_row_start(thread_num*step, n, symmetric);
312  int32_t end=(thread_num==num_threads) ? m : compute_row_start((thread_num+1)*step, n, symmetric);
313 
314  for (int32_t i=start; i<end; i++)
315  {
316  int32_t j_start=0;
317 
318  if (symmetric)
319  j_start=i;
320 
321  for (int32_t j=j_start; j<n; j++)
322  {
323  float64_t v=this->distance(i,j);
324  result[i+j*m]=v;
325 
326  if (symmetric && i!=j)
327  result[j+i*m]=v;
328 
329  if (verbose)
330  {
331  total++;
332 
333  if (symmetric && i!=j)
334  total++;
335 
336  if (total%100 == 0)
337  SG_OBJ_PROGRESS(this, total, total_start, total_end)
338 
340  break;
341  }
342  }
343  }
344  }
345 
346  SG_DONE()
347 
348  return SGMatrix<T>(result,m,n,true);
349 }
350 
351 template SGMatrix<float64_t> CDistance::get_distance_matrix<float64_t>();
352 template SGMatrix<float32_t> CDistance::get_distance_matrix<float32_t>();
virtual const char * get_name() const =0
virtual bool support_compatible_class() const
Definition: Features.h:323
#define SG_INFO(...)
Definition: SGIO.h:117
#define SG_RESET_LOCALE
Definition: SGIO.h:85
#define SG_DONE()
Definition: SGIO.h:156
void do_precompute_matrix()
matrix precomputation
Definition: Distance.cpp:231
virtual bool has_features()
Definition: Distance.h:324
virtual bool get_feature_class_compatibility(EFeatureClass rhs) const
Definition: Features.cpp:355
int32_t index_t
Definition: common.h:72
#define SG_PROGRESS(...)
Definition: SGIO.h:141
virtual int32_t get_num_vec_lhs()
Definition: Distance.h:306
virtual CFeatures * replace_lhs(CFeatures *lhs)
Definition: Distance.cpp:165
virtual void remove_lhs()
takes all necessary steps if the lhs is removed from distance matrix
Definition: Distance.cpp:132
int32_t num_rhs
Definition: Distance.h:404
virtual int32_t get_num_vectors() const =0
virtual ~CDistance()
Definition: Distance.cpp:47
#define REQUIRE(x,...)
Definition: SGIO.h:205
Parameter * m_parameters
Definition: SGObject.h:567
void run_distance_rhs(SGVector< float64_t > &result, const index_t idx_r_start, index_t idx_start, const index_t idx_stop, const index_t idx_a)
Definition: Distance.cpp:219
#define SG_REF(x)
Definition: SGObject.h:52
#define SG_SET_LOCALE_C
Definition: SGIO.h:84
virtual bool check_compatibility(CFeatures *l, CFeatures *r)
Definition: Distance.cpp:78
shogun matrix
void add(bool *param, const char *name, const char *description="")
Definition: Parameter.cpp:38
#define ASSERT(x)
Definition: SGIO.h:200
Class SGObject is the base class of all shogun objects.
Definition: SGObject.h:125
#define SG_OBJ_PROGRESS(o,...)
Definition: SGIO.h:146
virtual void remove_lhs_and_rhs()
Definition: Distance.cpp:121
double float64_t
Definition: common.h:60
void save(CFile *writer)
Definition: Distance.cpp:115
A File access base class.
Definition: File.h:34
void load(CFile *loader)
Definition: Distance.cpp:109
virtual EFeatureClass get_feature_class() const =0
void run_distance_lhs(SGVector< float64_t > &result, const index_t idx_r_start, index_t idx_start, const index_t idx_stop, const index_t idx_b)
Definition: Distance.cpp:225
virtual int32_t get_num_vec_rhs()
Definition: Distance.h:315
int32_t num_lhs
Definition: Distance.h:402
static bool cancel_computations()
Definition: Signal.h:111
virtual CFeatures * replace_rhs(CFeatures *rhs)
Definition: Distance.cpp:147
float float32_t
Definition: common.h:59
int32_t compute_row_start(int64_t offs, int32_t n, bool symmetric)
Definition: Distance.h:173
virtual float64_t distance(int32_t idx_a, int32_t idx_b)
Definition: Distance.cpp:183
#define SG_UNREF(x)
Definition: SGObject.h:53
#define SG_DEBUG(...)
Definition: SGIO.h:106
bool precompute_matrix
Definition: Distance.h:394
all of classes and functions are contained in the shogun namespace
Definition: class_list.h:18
CFeatures * lhs
feature vectors to occur on the left hand side
Definition: Distance.h:397
The class Features is the base class of all feature objects.
Definition: Features.h:68
CFeatures * rhs
feature vectors to occur on the right hand side
Definition: Distance.h:399
SGMatrix< float64_t > get_distance_matrix()
Definition: Distance.h:156
float32_t * precomputed_matrix
Definition: Distance.h:389
virtual bool init(CFeatures *lhs, CFeatures *rhs)
Definition: Distance.cpp:55
virtual void remove_rhs()
takes all necessary steps if the rhs is removed from distance matrix
Definition: Distance.cpp:140
virtual float64_t compute(int32_t idx_a, int32_t idx_b)=0
virtual EFeatureType get_feature_type() const =0

SHOGUN Machine Learning Toolbox - Documentation