SHOGUN  5.0.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules
Distance.cpp
Go to the documentation of this file.
1 /*
2  * this program is free software; you can redistribute it and/or modify
3  * it under the terms of the GNU General Public License as published by
4  * the Free Software Foundation; either version 3 of the License, or
5  * (at your option) any later version.
6  *
7  * Written (W) 2006-2009 Christian Gehl
8  * Written (W) 2006-2009 Soeren Sonnenburg
9  * Copyright (C) 2006-2009 Fraunhofer Institute FIRST and Max-Planck-Society
10  */
11 
12 #include <shogun/lib/config.h>
13 #include <shogun/lib/common.h>
14 #include <shogun/io/SGIO.h>
15 #include <shogun/io/File.h>
16 #include <shogun/lib/Time.h>
17 #include <shogun/lib/Signal.h>
18 #include <shogun/base/Parallel.h>
19 #include <shogun/base/Parameter.h>
20 
23 
24 #include <string.h>
25 #include <unistd.h>
26 
27 #ifdef HAVE_PTHREAD
28 #include <pthread.h>
29 #endif
30 
31 using namespace shogun;
32 
34 template <class T> struct D_THREAD_PARAM
35 {
39  int32_t start;
41  int32_t end;
43  int32_t total_start;
45  int32_t total_end;
47  int32_t m;
49  int32_t n;
51  T* result;
53  bool symmetric;
55  bool verbose;
56 };
57 
59 {
60  init();
61 }
62 
63 
65 {
66  init();
67  init(p_lhs, p_rhs);
68 }
69 
71 {
72  SG_FREE(precomputed_matrix);
73  precomputed_matrix=NULL;
74 
76 }
77 
78 bool CDistance::init(CFeatures* l, CFeatures* r)
79 {
80  REQUIRE(check_compatibility(l, r), "Features are not compatible!\n");
81 
82  //remove references to previous features
84 
85  //increase reference counts
86  SG_REF(l);
87  SG_REF(r);
88 
89  lhs=l;
90  rhs=r;
91 
94 
95  SG_FREE(precomputed_matrix);
96  precomputed_matrix=NULL ;
97 
98  return true;
99 }
100 
102 {
103  REQUIRE(l, "Left hand side features must be set!\n");
104  REQUIRE(r, "Right hand side features must be set!\n");
105 
107  "Right hand side of features (%s) must be of same type with left hand side features (%s)\n",
108  r->get_name(), l->get_name());
109 
110  if (l->support_compatible_class())
111  {
113  "Right hand side of features (%s) must be compatible with left hand side features (%s)\n",
114  r->get_name(), l->get_name());
115  }
116  else if (r->support_compatible_class())
117  {
119  "Right hand side of features (%s) must be compatible with left hand side features (%s)\n",
120  r->get_name(), l->get_name());
121  }
122  else
123  {
125  "Right hand side of features (%s) must be compatible with left hand side features (%s)\n",
126  r->get_name(), l->get_name());
127  }
128 
129  return true;
130 }
131 
132 void CDistance::load(CFile* loader)
133 {
136 }
137 
138 void CDistance::save(CFile* writer)
139 {
142 }
143 
145 {
146  SG_UNREF(rhs);
147  rhs = NULL;
148  num_rhs=0;
149 
150  SG_UNREF(lhs);
151  lhs = NULL;
152  num_lhs=0;
153 }
154 
156 {
157  SG_UNREF(lhs);
158  lhs = NULL;
159  num_lhs=0;
160 }
161 
164 {
165  SG_UNREF(rhs);
166  rhs = NULL;
167  num_rhs=0;
168 }
169 
171 {
172  //make sure features are compatible
173  REQUIRE(check_compatibility(lhs, r), "Features are not compatible!\n");
174 
175  //remove references to previous rhs features
176  CFeatures* tmp=rhs;
177 
178  rhs=r;
180 
181  SG_FREE(precomputed_matrix);
182  precomputed_matrix=NULL ;
183 
184  // return old features including reference count
185  return tmp;
186 }
187 
189 {
190  //make sure features are compatible
191  REQUIRE(check_compatibility(l, rhs), "Features are not compatible!\n");
192 
193  //remove references to previous rhs features
194  CFeatures* tmp=lhs;
195 
196  lhs=l;
198 
199  SG_FREE(precomputed_matrix);
200  precomputed_matrix=NULL ;
201 
202  // return old features including reference count
203  return tmp;
204 }
205 
206 float64_t CDistance::distance(int32_t idx_a, int32_t idx_b)
207 {
208  REQUIRE(idx_a < lhs->get_num_vectors() && idx_b < rhs->get_num_vectors() && \
209  idx_a >= 0 && idx_b >= 0,
210  "idx_a (%d) must be in [0,%d] and idx_b (%d) must be in [0,%d]\n",
211  idx_a, lhs->get_num_vectors()-1, idx_b, rhs->get_num_vectors()-1)
212 
213  ASSERT(lhs)
214  ASSERT(rhs)
215 
216  if (lhs==rhs)
217  {
218  int32_t num_vectors = lhs->get_num_vectors();
219 
220  if (idx_a>=num_vectors)
221  idx_a=2*num_vectors-1-idx_a;
222 
223  if (idx_b>=num_vectors)
224  idx_b=2*num_vectors-1-idx_b;
225  }
226 
227 
228  if (precompute_matrix && (precomputed_matrix==NULL) && (lhs==rhs))
230 
231  if (precompute_matrix && (precomputed_matrix!=NULL))
232  {
233  if (idx_a>=idx_b)
234  return precomputed_matrix[idx_a*(idx_a+1)/2+idx_b] ;
235  else
236  return precomputed_matrix[idx_b*(idx_b+1)/2+idx_a] ;
237  }
238 
239  return compute(idx_a, idx_b);
240 }
241 
243 {
244  int32_t num_left=lhs->get_num_vectors();
245  int32_t num_right=rhs->get_num_vectors();
246  SG_INFO("precomputing distance matrix (%ix%i)\n", num_left, num_right)
247 
248  ASSERT(num_left==num_right)
249  ASSERT(lhs==rhs)
250  int32_t num=num_left;
251 
252  SG_FREE(precomputed_matrix);
253  precomputed_matrix=SG_MALLOC(float32_t, num*(num+1)/2);
254 
255  for (int32_t i=0; i<num; i++)
256  {
257  SG_PROGRESS(i*i,0,num*num)
258  for (int32_t j=0; j<=i; j++)
259  precomputed_matrix[i*(i+1)/2+j] = compute(i,j) ;
260  }
261 
262  SG_PROGRESS(num*num,0,num*num)
263  SG_DONE()
264 }
265 
266 void CDistance::init()
267 {
268  precomputed_matrix = NULL;
269  precompute_matrix = false;
270  lhs = NULL;
271  rhs = NULL;
272  num_lhs=0;
273  num_rhs=0;
274 
275  m_parameters->add((CSGObject**) &lhs, "lhs",
276  "Feature vectors to occur on left hand side.");
277  m_parameters->add((CSGObject**) &rhs, "rhs",
278  "Feature vectors to occur on right hand side.");
279 }
280 
281 template <class T> void* CDistance::get_distance_matrix_helper(void* p)
282 {
283  D_THREAD_PARAM<T>* params= (D_THREAD_PARAM<T>*) p;
284  int32_t i_start=params->start;
285  int32_t i_end=params->end;
286  CDistance* k=params->distance;
287  T* result=params->result;
288  bool symmetric=params->symmetric;
289  int32_t n=params->n;
290  int32_t m=params->m;
291  bool verbose=params->verbose;
292  int64_t total_start=params->total_start;
293  int64_t total_end=params->total_end;
294  int64_t total=total_start;
295 
296  for (int32_t i=i_start; i<i_end; i++)
297  {
298  int32_t j_start=0;
299 
300  if (symmetric)
301  j_start=i;
302 
303  for (int32_t j=j_start; j<n; j++)
304  {
305  float64_t v=k->distance(i,j);
306  result[i+j*m]=v;
307 
308  if (symmetric && i!=j)
309  result[j+i*m]=v;
310 
311  if (verbose)
312  {
313  total++;
314 
315  if (symmetric && i!=j)
316  total++;
317 
318  if (total%100 == 0)
319  SG_OBJ_PROGRESS(k, total, total_start, total_end)
320 
322  break;
323  }
324  }
325 
326  }
327 
328  return NULL;
329 }
330 
331 template <class T>
333 {
334  T* result = NULL;
335 
336  REQUIRE(has_features(), "no features assigned to distance\n")
337 
338  int32_t m=get_num_vec_lhs();
339  int32_t n=get_num_vec_rhs();
340 
341  int64_t total_num = int64_t(m)*n;
342 
343  // if lhs == rhs and sizes match assume k(i,j)=k(j,i)
344  bool symmetric= (lhs && lhs==rhs && m==n);
345 
346  SG_DEBUG("returning distance matrix of size %dx%d\n", m, n)
347 
348  result=SG_MALLOC(T, total_num);
349 
350  int32_t num_threads=parallel->get_num_threads();
351  if (num_threads < 2)
352  {
353  D_THREAD_PARAM<T> params;
354  params.distance=this;
355  params.result=result;
356  params.start=0;
357  params.end=m;
358  params.total_start=0;
359  params.total_end=total_num;
360  params.n=n;
361  params.m=m;
362  params.symmetric=symmetric;
363  params.verbose=true;
364  get_distance_matrix_helper<T>((void*) &params);
365  }
366  else
367  {
368  pthread_t* threads = SG_MALLOC(pthread_t, num_threads-1);
369  D_THREAD_PARAM<T>* params = SG_MALLOC(D_THREAD_PARAM<T>, num_threads);
370  int64_t step= total_num/num_threads;
371 
372  int32_t t;
373 
374  num_threads--;
375  for (t=0; t<num_threads; t++)
376  {
377  params[t].distance = this;
378  params[t].result = result;
379  params[t].start = compute_row_start(t*step, n, symmetric);
380  params[t].end = compute_row_start((t+1)*step, n, symmetric);
381  params[t].total_start=t*step;
382  params[t].total_end=(t+1)*step;
383  params[t].n=n;
384  params[t].m=m;
385  params[t].symmetric=symmetric;
386  params[t].verbose=false;
387 
388  int code=pthread_create(&threads[t], NULL,
389  CDistance::get_distance_matrix_helper<T>, (void*)&params[t]);
390 
391  if (code != 0)
392  {
393  SG_WARNING("Thread creation failed (thread %d of %d) "
394  "with error:'%s'\n",t, num_threads, strerror(code));
395  num_threads=t;
396  break;
397  }
398  }
399 
400  params[t].distance = this;
401  params[t].result = result;
402  params[t].start = compute_row_start(t*step, n, symmetric);
403  params[t].end = m;
404  params[t].total_start=t*step;
405  params[t].total_end=total_num;
406  params[t].n=n;
407  params[t].m=m;
408  params[t].symmetric=symmetric;
409  params[t].verbose=true;
410  get_distance_matrix_helper<T>(&params[t]);
411 
412  for (t=0; t<num_threads; t++)
413  {
414  if (pthread_join(threads[t], NULL) != 0)
415  SG_WARNING("pthread_join of thread %d/%d failed\n", t, num_threads)
416  }
417 
418  SG_FREE(params);
419  SG_FREE(threads);
420  }
421 
422  SG_DONE()
423 
424  return SGMatrix<T>(result,m,n,true);
425 }
426 
427 template SGMatrix<float64_t> CDistance::get_distance_matrix<float64_t>();
428 template SGMatrix<float32_t> CDistance::get_distance_matrix<float32_t>();
429 
430 template void* CDistance::get_distance_matrix_helper<float64_t>(void* p);
431 template void* CDistance::get_distance_matrix_helper<float32_t>(void* p);
int32_t end
Definition: Distance.cpp:41
virtual const char * get_name() const =0
virtual bool support_compatible_class() const
Definition: Features.h:323
#define SG_INFO(...)
Definition: SGIO.h:118
#define SG_RESET_LOCALE
Definition: SGIO.h:86
#define SG_DONE()
Definition: SGIO.h:157
void do_precompute_matrix()
matrix precomputation
Definition: Distance.cpp:242
int32_t start
Definition: Distance.cpp:39
virtual bool has_features()
Definition: Distance.h:330
Class Distance, a base class for all the distances used in the Shogun toolbox.
Definition: Distance.h:87
virtual bool get_feature_class_compatibility(EFeatureClass rhs) const
Definition: Features.cpp:355
int32_t get_num_threads() const
Definition: Parallel.cpp:78
#define SG_PROGRESS(...)
Definition: SGIO.h:142
virtual int32_t get_num_vec_lhs()
Definition: Distance.h:312
virtual CFeatures * replace_lhs(CFeatures *lhs)
Definition: Distance.cpp:188
virtual void remove_lhs()
takes all necessary steps if the lhs is removed from distance matrix
Definition: Distance.cpp:155
int32_t num_rhs
Definition: Distance.h:388
virtual int32_t get_num_vectors() const =0
virtual ~CDistance()
Definition: Distance.cpp:70
#define REQUIRE(x,...)
Definition: SGIO.h:206
Parameter * m_parameters
Definition: SGObject.h:546
Parallel * parallel
Definition: SGObject.h:540
int32_t total_end
Definition: Distance.cpp:45
#define SG_REF(x)
Definition: SGObject.h:54
#define SG_SET_LOCALE_C
Definition: SGIO.h:85
virtual bool check_compatibility(CFeatures *l, CFeatures *r)
Definition: Distance.cpp:101
shogun matrix
int32_t total_start
Definition: Distance.cpp:43
void add(bool *param, const char *name, const char *description="")
Definition: Parameter.cpp:37
CDistance * distance
Definition: Distance.cpp:37
#define ASSERT(x)
Definition: SGIO.h:201
Class SGObject is the base class of all shogun objects.
Definition: SGObject.h:115
#define SG_OBJ_PROGRESS(o,...)
Definition: SGIO.h:147
virtual void remove_lhs_and_rhs()
Definition: Distance.cpp:144
double float64_t
Definition: common.h:50
void save(CFile *writer)
Definition: Distance.cpp:138
A File access base class.
Definition: File.h:34
void load(CFile *loader)
Definition: Distance.cpp:132
virtual EFeatureClass get_feature_class() const =0
virtual int32_t get_num_vec_rhs()
Definition: Distance.h:321
int32_t num_lhs
Definition: Distance.h:386
static bool cancel_computations()
Definition: Signal.h:86
virtual CFeatures * replace_rhs(CFeatures *rhs)
Definition: Distance.cpp:170
float float32_t
Definition: common.h:49
int32_t compute_row_start(int64_t offs, int32_t n, bool symmetric)
Definition: Distance.h:173
virtual float64_t distance(int32_t idx_a, int32_t idx_b)
Definition: Distance.cpp:206
#define SG_UNREF(x)
Definition: SGObject.h:55
#define SG_DEBUG(...)
Definition: SGIO.h:107
bool precompute_matrix
Definition: Distance.h:378
all of classes and functions are contained in the shogun namespace
Definition: class_list.h:18
CFeatures * lhs
feature vectors to occur on the left hand side
Definition: Distance.h:381
The class Features is the base class of all feature objects.
Definition: Features.h:68
CFeatures * rhs
feature vectors to occur on the right hand side
Definition: Distance.h:383
SGMatrix< float64_t > get_distance_matrix()
Definition: Distance.h:156
#define SG_WARNING(...)
Definition: SGIO.h:128
float32_t * precomputed_matrix
Definition: Distance.h:373
virtual bool init(CFeatures *lhs, CFeatures *rhs)
Definition: Distance.cpp:78
virtual void remove_rhs()
takes all necessary steps if the rhs is removed from distance matrix
Definition: Distance.cpp:163
static void * get_distance_matrix_helper(void *p)
Definition: Distance.cpp:281
virtual float64_t compute(int32_t idx_a, int32_t idx_b)=0
virtual EFeatureType get_feature_type() const =0

SHOGUN Machine Learning Toolbox - Documentation