SHOGUN  6.0.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules
DenseFeatures.cpp
Go to the documentation of this file.
1 /*
2  * This program is free software; you can redistribute it and/or modify
3  * it under the terms of the GNU General Public License as published by
4  * the Free Software Foundation; either version 3 of the License, or
5  * (at your option) any later version.
6  *
7  * Written (W) 1999-2010 Soeren Sonnenburg
8  * Written (W) 1999-2008 Gunnar Raetsch
9  * Written (W) 2011-2013 Heiko Strathmann
10  * Written (W) 2014-2017 Soumyajit De
11  * Copyright (C) 1999-2009 Fraunhofer Institute FIRST and Max-Planck-Society
12  * Copyright (C) 2010 Berlin Institute of Technology
13  */
14 
15 #include <shogun/base/some.h>
18 #include <shogun/io/SGIO.h>
19 #include <shogun/base/Parameter.h>
22 #include <algorithm>
23 #include <string.h>
24 
25 namespace shogun {
26 
27 template<class ST> CDenseFeatures<ST>::CDenseFeatures(int32_t size) : CDotFeatures(size)
28 {
29  init();
30 }
31 
32 template<class ST> CDenseFeatures<ST>::CDenseFeatures(const CDenseFeatures & orig) :
33  CDotFeatures(orig)
34 {
35  init();
38 
39  if (orig.m_subset_stack != NULL)
40  {
44  }
45 }
46 
48  CDotFeatures()
49 {
50  init();
51  set_feature_matrix(matrix);
52 }
53 
54 template<class ST> CDenseFeatures<ST>::CDenseFeatures(ST* src, int32_t num_feat, int32_t num_vec) :
55  CDotFeatures()
56 {
57  init();
58  set_feature_matrix(SGMatrix<ST>(src, num_feat, num_vec));
59 }
60 template<class ST> CDenseFeatures<ST>::CDenseFeatures(CFile* loader) :
61  CDotFeatures()
62 {
63  init();
64  load(loader);
65 }
66 
67 template<class ST> CFeatures* CDenseFeatures<ST>::duplicate() const
68 {
69  return new CDenseFeatures<ST>(*this);
70 }
71 
73 {
74  free_features();
75 }
76 
77 template<class ST> void CDenseFeatures<ST>::free_features()
78 {
79  m_subset_stack->remove_all_subsets();
80  free_feature_matrix();
81  SG_UNREF(feature_cache);
82 }
83 
84 template<class ST> void CDenseFeatures<ST>::free_feature_matrix()
85 {
86  m_subset_stack->remove_all_subsets();
87  feature_matrix=SGMatrix<ST>();
88  num_vectors = 0;
89  num_features = 0;
90 }
91 
92 template<class ST> ST* CDenseFeatures<ST>::get_feature_vector(int32_t num, int32_t& len, bool& dofree)
93 {
94  /* index conversion for subset, only for array access */
95  int32_t real_num=m_subset_stack->subset_idx_conversion(num);
96 
97  len = num_features;
98 
99  if (feature_matrix.matrix)
100  {
101  dofree = false;
102  return &feature_matrix.matrix[real_num * int64_t(num_features)];
103  }
104 
105  ST* feat = NULL;
106  dofree = false;
107 
108  if (feature_cache)
109  {
110  feat = feature_cache->lock_entry(real_num);
111 
112  if (feat)
113  return feat;
114  else
115  feat = feature_cache->set_entry(real_num);
116  }
117 
118  if (!feat)
119  dofree = true;
120  feat = compute_feature_vector(num, len, feat);
121 
122  if (get_num_preprocessors())
123  {
124  int32_t tmp_len = len;
125  ST* tmp_feat_before = feat;
126  ST* tmp_feat_after = NULL;
127 
128  for (int32_t i = 0; i < get_num_preprocessors(); i++)
129  {
131  (CDensePreprocessor<ST>*) get_preprocessor(i);
132  // temporary hack
134  SGVector<ST>(tmp_feat_before, tmp_len));
135  tmp_feat_after = applied.vector;
136  SG_UNREF(p);
137 
138  if (i != 0) // delete feature vector, except for the the first one, i.e., feat
139  SG_FREE(tmp_feat_before);
140  tmp_feat_before = tmp_feat_after;
141  }
142 
143  // note: tmp_feat_after should be checked as it is used by memcpy
144  if (tmp_feat_after)
145  {
146  sg_memcpy(feat, tmp_feat_after, sizeof(ST) * tmp_len);
147  SG_FREE(tmp_feat_after);
148 
149  len = tmp_len;
150  }
151  }
152  return feat;
153 }
154 
155 template<class ST> void CDenseFeatures<ST>::set_feature_vector(SGVector<ST> vector, int32_t num)
156 {
157  /* index conversion for subset, only for array access */
158  int32_t real_num=m_subset_stack->subset_idx_conversion(num);
159 
160  if (num>=get_num_vectors())
161  {
162  SG_ERROR("Index out of bounds (number of vectors %d, you "
163  "requested %d)\n", get_num_vectors(), num);
164  }
165 
166  if (!feature_matrix.matrix)
167  SG_ERROR("Requires a in-memory feature matrix\n")
168 
169  if (vector.vlen != num_features)
170  SG_ERROR(
171  "Vector not of length %d (has %d)\n", num_features, vector.vlen);
172 
173  sg_memcpy(&feature_matrix.matrix[real_num * int64_t(num_features)], vector.vector,
174  int64_t(num_features) * sizeof(ST));
175 }
176 
178 {
179  /* index conversion for subset, only for array access */
180  int32_t real_num=m_subset_stack->subset_idx_conversion(num);
181 
182  if (num >= get_num_vectors())
183  {
184  SG_ERROR("Index out of bounds (number of vectors %d, you "
185  "requested %d)\n", get_num_vectors(), real_num);
186  }
187 
188  int32_t vlen;
189  bool do_free;
190  ST* vector= get_feature_vector(num, vlen, do_free);
191  return SGVector<ST>(vector, vlen, do_free);
192 }
193 
194 template<class ST> void CDenseFeatures<ST>::free_feature_vector(ST* feat_vec, int32_t num, bool dofree)
195 {
196  if (feature_cache)
197  feature_cache->unlock_entry(m_subset_stack->subset_idx_conversion(num));
198 
199  if (dofree)
200  SG_FREE(feat_vec);
201 }
202 
203 template<class ST> void CDenseFeatures<ST>::free_feature_vector(SGVector<ST> vec, int32_t num)
204 {
205  free_feature_vector(vec.vector, num, false);
206  vec=SGVector<ST>();
207 }
208 
209 template<class ST> void CDenseFeatures<ST>::vector_subset(int32_t* idx, int32_t idx_len)
210 {
211  if (m_subset_stack->has_subsets())
212  SG_ERROR("A subset is set, cannot call vector_subset\n")
213 
214  ASSERT(feature_matrix.matrix)
215  ASSERT(idx_len<=num_vectors)
216 
217  int32_t num_vec = num_vectors;
218  num_vectors = idx_len;
219 
220  int32_t old_ii = -1;
221 
222  for (int32_t i = 0; i < idx_len; i++)
223  {
224  int32_t ii = idx[i];
225  ASSERT(old_ii<ii)
226 
227  if (ii < 0 || ii >= num_vec)
228  SG_ERROR("Index out of range: should be 0<%d<%d\n", ii, num_vec)
229 
230  if (i == ii)
231  continue;
232 
233  sg_memcpy(&feature_matrix.matrix[int64_t(num_features) * i],
234  &feature_matrix.matrix[int64_t(num_features) * ii],
235  num_features * sizeof(ST));
236  old_ii = ii;
237  }
238 }
239 
240 template<class ST> void CDenseFeatures<ST>::feature_subset(int32_t* idx, int32_t idx_len)
241 {
242  if (m_subset_stack->has_subsets())
243  SG_ERROR("A subset is set, cannot call feature_subset\n")
244 
245  ASSERT(feature_matrix.matrix)
246  ASSERT(idx_len<=num_features)
247  int32_t num_feat = num_features;
248  num_features = idx_len;
249 
250  for (int32_t i = 0; i < num_vectors; i++)
251  {
252  ST* src = &feature_matrix.matrix[int64_t(num_feat) * i];
253  ST* dst = &feature_matrix.matrix[int64_t(num_features) * i];
254 
255  int32_t old_jj = -1;
256  for (int32_t j = 0; j < idx_len; j++)
257  {
258  int32_t jj = idx[j];
259  ASSERT(old_jj<jj)
260  if (jj < 0 || jj >= num_feat)
261  SG_ERROR(
262  "Index out of range: should be 0<%d<%d\n", jj, num_feat);
263 
264  dst[j] = src[jj];
265  old_jj = jj;
266  }
267  }
268 }
269 
270 template <class ST>
272 {
273  if (!m_subset_stack->has_subsets())
274  return feature_matrix;
275 
276  SGMatrix<ST> target(num_features, get_num_vectors());
277  copy_feature_matrix(target);
278  return target;
279 }
280 
281 template <class ST>
283 {
284  REQUIRE(column_offset>=0, "Column offset (%d) cannot be negative!\n", column_offset);
285  REQUIRE(!target.equals(feature_matrix), "Source and target feature matrices cannot be the same\n");
286 
287  index_t num_vecs=get_num_vectors();
288  index_t num_cols=num_vecs+column_offset;
289 
290  REQUIRE(target.matrix!=nullptr, "Provided matrix is not allocated!\n");
291  REQUIRE(target.num_rows==num_features,
292  "Number of rows of given matrix (%d) should be equal to the number of features (%d)!\n",
293  target.num_rows, num_features);
294  REQUIRE(target.num_cols>=num_cols,
295  "Number of cols of given matrix (%d) should be at least %d!\n",
296  target.num_cols, num_cols);
297 
298  if (!m_subset_stack->has_subsets())
299  {
300  auto src=feature_matrix.matrix;
301  auto dest=target.matrix+int64_t(num_features)*column_offset;
302  sg_memcpy(dest, src, feature_matrix.size()*sizeof(ST));
303  }
304  else
305  {
306  for (int32_t i=0; i<num_vecs; ++i)
307  {
308  auto real_i=m_subset_stack->subset_idx_conversion(i);
309  auto src=feature_matrix.matrix+real_i*int64_t(num_features);
310  auto dest=target.matrix+int64_t(num_features)*(column_offset+i);
311  sg_memcpy(dest, src, num_features*sizeof(ST));
312  }
313  }
314 }
315 
317 {
318  SGMatrix<ST> st_feature_matrix=feature_matrix;
319  m_subset_stack->remove_all_subsets();
320  SG_UNREF(feature_cache);
321  clean_preprocessors();
322  free_feature_matrix();
323  return st_feature_matrix;
324 }
325 
327 {
328  m_subset_stack->remove_all_subsets();
329  free_feature_matrix();
330  feature_matrix = matrix;
331  num_features = matrix.num_rows;
332  num_vectors = matrix.num_cols;
333 }
334 
335 template<class ST> ST* CDenseFeatures<ST>::get_feature_matrix(int32_t &num_feat, int32_t &num_vec)
336 {
337  num_feat = num_features;
338  num_vec = num_vectors;
339  return feature_matrix.matrix;
340 }
341 
343 {
344  int32_t num_feat;
345  int32_t num_vec;
346  ST* fm = get_transposed(num_feat, num_vec);
347 
348  return new CDenseFeatures<ST>(fm, num_feat, num_vec);
349 }
350 
351 template<class ST> ST* CDenseFeatures<ST>::get_transposed(int32_t &num_feat, int32_t &num_vec)
352 {
353  num_feat = get_num_vectors();
354  num_vec = num_features;
355 
356  int32_t old_num_vec=get_num_vectors();
357 
358  ST* fm = SG_MALLOC(ST, int64_t(num_feat) * num_vec);
359 
360  for (int32_t i=0; i<old_num_vec; i++)
361  {
362  SGVector<ST> vec=get_feature_vector(i);
363 
364  for (int32_t j=0; j<vec.vlen; j++)
365  fm[j*int64_t(old_num_vec)+i]=vec.vector[j];
366 
367  free_feature_vector(vec, i);
368  }
369 
370  return fm;
371 }
372 
374 {
375  m_subset_stack->remove_all_subsets();
376 
377  int32_t num_feat = df->get_dim_feature_space();
378  int32_t num_vec = df->get_num_vectors();
379 
380  ASSERT(num_feat>0 && num_vec>0)
381 
382  free_feature_matrix();
383  feature_matrix = SGMatrix<ST>(num_feat, num_vec);
384 
385  for (int32_t i = 0; i < num_vec; i++)
386  {
388  ASSERT(num_feat==v.vlen)
389 
390  for (int32_t j = 0; j < num_feat; j++)
391  feature_matrix.matrix[i * int64_t(num_feat) + j] = (ST) v.vector[j];
392  }
393  num_features = num_feat;
394  num_vectors = num_vec;
395 }
396 
397 template<class ST> bool CDenseFeatures<ST>::apply_preprocessor(bool force_preprocessing)
398 {
399  if (m_subset_stack->has_subsets())
400  SG_ERROR("A subset is set, cannot call apply_preproc\n")
401 
402  SG_DEBUG("force: %d\n", force_preprocessing)
403 
404  if (feature_matrix.matrix && get_num_preprocessors())
405  {
406  for (int32_t i = 0; i < get_num_preprocessors(); i++)
407  {
408  if ((!is_preprocessed(i) || force_preprocessing))
409  {
410  set_preprocessed(i);
412  (CDensePreprocessor<ST>*) get_preprocessor(i);
413  SG_INFO("preprocessing using preproc %s\n", p->get_name())
414 
415  if (p->apply_to_feature_matrix(this).matrix == NULL)
416  {
417  SG_UNREF(p);
418  return false;
419  }
420  SG_UNREF(p);
421 
422  }
423  }
424 
425  return true;
426  }
427  else
428  {
429  if (!feature_matrix.matrix)
430  SG_ERROR("no feature matrix\n")
431 
432  if (!get_num_preprocessors())
433  SG_ERROR("no preprocessors available\n")
434 
435  return false;
436  }
437 }
438 
439 template<class ST> int32_t CDenseFeatures<ST>::get_num_vectors() const
440 {
441  return m_subset_stack->has_subsets() ? m_subset_stack->get_size() : num_vectors;
442 }
443 
444 template<class ST> int32_t CDenseFeatures<ST>::get_num_features() const { return num_features; }
445 
446 template<class ST> void CDenseFeatures<ST>::set_num_features(int32_t num)
447 {
448  num_features = num;
449  initialize_cache();
450 }
451 
452 template<class ST> void CDenseFeatures<ST>::set_num_vectors(int32_t num)
453 {
454  if (m_subset_stack->has_subsets())
455  SG_ERROR("A subset is set, cannot call set_num_vectors\n")
456 
457  num_vectors = num;
458  initialize_cache();
459 }
460 
461 template<class ST> void CDenseFeatures<ST>::initialize_cache()
462 {
463  if (m_subset_stack->has_subsets())
464  SG_ERROR("A subset is set, cannot call initialize_cache\n")
465 
466  if (num_features && num_vectors)
467  {
468  SG_UNREF(feature_cache);
469  feature_cache = new CCache<ST>(get_cache_size(), num_features,
470  num_vectors);
471  SG_REF(feature_cache);
472  }
473 }
474 
475 template<class ST> EFeatureClass CDenseFeatures<ST>::get_feature_class() const { return C_DENSE; }
476 
477 template<class ST> bool CDenseFeatures<ST>::reshape(int32_t p_num_features, int32_t p_num_vectors)
478 {
479  if (m_subset_stack->has_subsets())
480  SG_ERROR("A subset is set, cannot call reshape\n")
481 
482  if (p_num_features * p_num_vectors
483  == this->num_features * this->num_vectors)
484  {
485  num_features = p_num_features;
486  num_vectors = p_num_vectors;
487  return true;
488  } else
489  return false;
490 }
491 
492 template<class ST> int32_t CDenseFeatures<ST>::get_dim_feature_space() const { return num_features; }
493 
494 template<class ST> float64_t CDenseFeatures<ST>::dot(int32_t vec_idx1, CDotFeatures* df,
495  int32_t vec_idx2)
496 {
497  ASSERT(df)
498  ASSERT(df->get_feature_type() == get_feature_type())
499  ASSERT(df->get_feature_class() == get_feature_class())
501 
502  int32_t len1, len2;
503  bool free1, free2;
504 
505  ST* vec1 = get_feature_vector(vec_idx1, len1, free1);
506  ST* vec2 = sf->get_feature_vector(vec_idx2, len2, free2);
507 
508  float64_t result = CMath::dot(vec1, vec2, len1);
509 
510  free_feature_vector(vec1, vec_idx1, free1);
511  sf->free_feature_vector(vec2, vec_idx2, free2);
512 
513  return result;
514 }
515 
516 template<class ST> void CDenseFeatures<ST>::add_to_dense_vec(float64_t alpha, int32_t vec_idx1,
517  float64_t* vec2, int32_t vec2_len, bool abs_val)
518 {
519  ASSERT(vec2_len == num_features)
520 
521  int32_t vlen;
522  bool vfree;
523  ST* vec1 = get_feature_vector(vec_idx1, vlen, vfree);
524 
525  ASSERT(vlen == num_features)
526 
527  if (abs_val)
528  {
529  for (int32_t i = 0; i < num_features; i++)
530  vec2[i] += alpha * CMath::abs(vec1[i]);
531  }
532  else
533  {
534  for (int32_t i = 0; i < num_features; i++)
535  vec2[i] += alpha * vec1[i];
536  }
537 
538  free_feature_vector(vec1, vec_idx1, vfree);
539 }
540 
541 template<>
543  float64_t* vec2, int32_t vec2_len, bool abs_val)
544 {
545  ASSERT(vec2_len == num_features)
546 
547  int32_t vlen;
548  bool vfree;
549  float64_t* vec1 = get_feature_vector(vec_idx1, vlen, vfree);
550 
551  ASSERT(vlen == num_features)
552 
553  if (abs_val)
554  {
555  for (int32_t i = 0; i < num_features; i++)
556  vec2[i] += alpha * CMath::abs(vec1[i]);
557  }
558  else
559  {
560  SGVector<float64_t>::vec1_plus_scalar_times_vec2(vec2, alpha, vec1, num_features);
561  }
562 
563  free_feature_vector(vec1, vec_idx1, vfree);
564 }
565 
566 template<class ST> int32_t CDenseFeatures<ST>::get_nnz_features_for_vector(int32_t num)
567 {
568  return num_features;
569 }
570 
571 template<class ST> void* CDenseFeatures<ST>::get_feature_iterator(int32_t vector_index)
572 {
573  if (vector_index>=get_num_vectors())
574  {
575  SG_ERROR("Index out of bounds (number of vectors %d, you "
576  "requested %d)\n", get_num_vectors(), vector_index);
577  }
578 
579  dense_feature_iterator* iterator = SG_MALLOC(dense_feature_iterator, 1);
580  iterator->vec = get_feature_vector(vector_index, iterator->vlen,
581  iterator->vfree);
582  iterator->vidx = vector_index;
583  iterator->index = 0;
584  return iterator;
585 }
586 
587 template<class ST> bool CDenseFeatures<ST>::get_next_feature(int32_t& index, float64_t& value,
588  void* iterator)
589 {
590  dense_feature_iterator* it = (dense_feature_iterator*) iterator;
591  if (!it || it->index >= it->vlen)
592  return false;
593 
594  index = it->index++;
595  value = (float64_t) it->vec[index];
596 
597  return true;
598 }
599 
600 template<class ST> void CDenseFeatures<ST>::free_feature_iterator(void* iterator)
601 {
602  if (!iterator)
603  return;
604 
605  dense_feature_iterator* it = (dense_feature_iterator*) iterator;
606  free_feature_vector(it->vec, it->vidx, it->vfree);
607  SG_FREE(it);
608 }
609 
611 {
612  SGMatrix<ST> feature_matrix_copy(num_features, indices.vlen);
613 
614  for (index_t i=0; i<indices.vlen; ++i)
615  {
616  index_t real_idx=m_subset_stack->subset_idx_conversion(indices.vector[i]);
617  sg_memcpy(&feature_matrix_copy.matrix[i*num_features],
618  &feature_matrix.matrix[real_idx*num_features],
619  num_features*sizeof(ST));
620  }
621 
622  CFeatures* result=new CDenseFeatures(feature_matrix_copy);
623  SG_REF(result);
624  return result;
625 }
626 
627 template<class ST>
629 {
630  SG_DEBUG("Entering!\n");
631 
632  // sanity checks
633  index_t max=CMath::max(dims.vector, dims.vlen);
634  index_t min=CMath::min(dims.vector, dims.vlen);
635  REQUIRE(max<num_features && min>=0,
636  "Provided dimensions is in the range [%d, %d] but they "
637  "have to be within [0, %d]! But it \n", min, max, num_features);
638 
639  SGMatrix<ST> feature_matrix_copy(dims.vlen, get_num_vectors());
640 
641  for (index_t i=0; i<dims.vlen; ++i)
642  {
643  for (index_t j=0; j<get_num_vectors(); ++j)
644  {
645  index_t real_idx=m_subset_stack->subset_idx_conversion(j);
646  feature_matrix_copy(i, j)=feature_matrix(dims[i], real_idx);
647  }
648  }
649 
650  CFeatures* result=new CDenseFeatures(feature_matrix_copy);
651  SG_REF(result);
652 
653  SG_DEBUG("Leaving!\n");
654  return result;
655 }
656 
657 template<class ST>
659 {
660  CFeatures* shallow_copy_features=NULL;
661 
662  SG_SDEBUG("Using underlying feature matrix with %d dimensions and %d feature vectors!\n", num_features, num_vectors);
663  SGMatrix<ST> shallow_copy_matrix(feature_matrix);
664  shallow_copy_features=new CDenseFeatures<ST>(shallow_copy_matrix);
665  SG_REF(shallow_copy_features);
666  if (m_subset_stack->has_subsets())
667  shallow_copy_features->add_subset(m_subset_stack->get_last_subset()->get_subset_idx());
668 
669  return shallow_copy_features;
670 }
671 
672 template<class ST> ST* CDenseFeatures<ST>::compute_feature_vector(int32_t num, int32_t& len,
673  ST* target)
674 {
676  len = 0;
677  return NULL;
678 }
679 
680 template<class ST> void CDenseFeatures<ST>::init()
681 {
682  num_vectors = 0;
683  num_features = 0;
684 
685  feature_matrix = SGMatrix<ST>();
686  feature_cache = NULL;
687 
688  set_generic<ST>();
689 
690  /* not store number of vectors in subset */
691  SG_ADD(&num_vectors, "num_vectors", "Number of vectors.", MS_NOT_AVAILABLE);
692  SG_ADD(&num_features, "num_features", "Number of features.", MS_NOT_AVAILABLE);
693  SG_ADD(&feature_matrix, "feature_matrix",
694  "Matrix of feature vectors / 1 vector per column.", MS_NOT_AVAILABLE);
695 }
696 
697 #define GET_FEATURE_TYPE(f_type, sg_type) \
698 template<> EFeatureType CDenseFeatures<sg_type>::get_feature_type() const \
699 { \
700  return f_type; \
701 }
702 
705 GET_FEATURE_TYPE(F_BYTE, uint8_t)
706 GET_FEATURE_TYPE(F_BYTE, int8_t)
707 GET_FEATURE_TYPE(F_SHORT, int16_t)
708 GET_FEATURE_TYPE(F_WORD, uint16_t)
709 GET_FEATURE_TYPE(F_INT, int32_t)
710 GET_FEATURE_TYPE(F_UINT, uint32_t)
711 GET_FEATURE_TYPE(F_LONG, int64_t)
712 GET_FEATURE_TYPE(F_ULONG, uint64_t)
716 #undef GET_FEATURE_TYPE
717 
718 template<> float64_t CDenseFeatures<bool>::dense_dot(int32_t vec_idx1,
719  const float64_t* vec2, int32_t vec2_len)
720 {
721  ASSERT(vec2_len == num_features)
722 
723  int32_t vlen;
724  bool vfree;
725  bool* vec1 = get_feature_vector(vec_idx1, vlen, vfree);
726 
727  ASSERT(vlen == num_features)
728  float64_t result = 0;
729 
730  for (int32_t i = 0; i < num_features; i++)
731  result += vec1[i] ? vec2[i] : 0;
732 
733  free_feature_vector(vec1, vec_idx1, vfree);
734 
735  return result;
736 }
737 
738 template<> float64_t CDenseFeatures<char>::dense_dot(int32_t vec_idx1,
739  const float64_t* vec2, int32_t vec2_len)
740 {
741  ASSERT(vec2_len == num_features)
742 
743  int32_t vlen;
744  bool vfree;
745  char* vec1 = get_feature_vector(vec_idx1, vlen, vfree);
746 
747  ASSERT(vlen == num_features)
748  float64_t result = 0;
749 
750  for (int32_t i = 0; i < num_features; i++)
751  result += vec1[i] * vec2[i];
752 
753  free_feature_vector(vec1, vec_idx1, vfree);
754 
755  return result;
756 }
757 
758 template<> float64_t CDenseFeatures<int8_t>::dense_dot(int32_t vec_idx1,
759  const float64_t* vec2, int32_t vec2_len)
760 {
761  ASSERT(vec2_len == num_features)
762 
763  int32_t vlen;
764  bool vfree;
765  int8_t* vec1 = get_feature_vector(vec_idx1, vlen, vfree);
766 
767  ASSERT(vlen == num_features)
768  float64_t result = 0;
769 
770  for (int32_t i = 0; i < num_features; i++)
771  result += vec1[i] * vec2[i];
772 
773  free_feature_vector(vec1, vec_idx1, vfree);
774 
775  return result;
776 }
777 
779  int32_t vec_idx1, const float64_t* vec2, int32_t vec2_len)
780 {
781  ASSERT(vec2_len == num_features)
782 
783  int32_t vlen;
784  bool vfree;
785  uint8_t* vec1 = get_feature_vector(vec_idx1, vlen, vfree);
786 
787  ASSERT(vlen == num_features)
788  float64_t result = 0;
789 
790  for (int32_t i = 0; i < num_features; i++)
791  result += vec1[i] * vec2[i];
792 
793  free_feature_vector(vec1, vec_idx1, vfree);
794 
795  return result;
796 }
797 
799  int32_t vec_idx1, const float64_t* vec2, int32_t vec2_len)
800 {
801  ASSERT(vec2_len == num_features)
802 
803  int32_t vlen;
804  bool vfree;
805  int16_t* vec1 = get_feature_vector(vec_idx1, vlen, vfree);
806 
807  ASSERT(vlen == num_features)
808  float64_t result = 0;
809 
810  for (int32_t i = 0; i < num_features; i++)
811  result += vec1[i] * vec2[i];
812 
813  free_feature_vector(vec1, vec_idx1, vfree);
814 
815  return result;
816 }
817 
819  int32_t vec_idx1, const float64_t* vec2, int32_t vec2_len)
820 {
821  ASSERT(vec2_len == num_features)
822 
823  int32_t vlen;
824  bool vfree;
825  uint16_t* vec1 = get_feature_vector(vec_idx1, vlen, vfree);
826 
827  ASSERT(vlen == num_features)
828  float64_t result = 0;
829 
830  for (int32_t i = 0; i < num_features; i++)
831  result += vec1[i] * vec2[i];
832 
833  free_feature_vector(vec1, vec_idx1, vfree);
834 
835  return result;
836 }
837 
839  int32_t vec_idx1, const float64_t* vec2, int32_t vec2_len)
840 {
841  ASSERT(vec2_len == num_features)
842 
843  int32_t vlen;
844  bool vfree;
845  int32_t* vec1 = get_feature_vector(vec_idx1, vlen, vfree);
846 
847  ASSERT(vlen == num_features)
848  float64_t result = 0;
849 
850  for (int32_t i = 0; i < num_features; i++)
851  result += vec1[i] * vec2[i];
852 
853  free_feature_vector(vec1, vec_idx1, vfree);
854 
855  return result;
856 }
857 
859  int32_t vec_idx1, const float64_t* vec2, int32_t vec2_len)
860 {
861  ASSERT(vec2_len == num_features)
862 
863  int32_t vlen;
864  bool vfree;
865  uint32_t* vec1 = get_feature_vector(vec_idx1, vlen, vfree);
866 
867  ASSERT(vlen == num_features)
868  float64_t result = 0;
869 
870  for (int32_t i = 0; i < num_features; i++)
871  result += vec1[i] * vec2[i];
872 
873  free_feature_vector(vec1, vec_idx1, vfree);
874 
875  return result;
876 }
877 
879  int32_t vec_idx1, const float64_t* vec2, int32_t vec2_len)
880 {
881  ASSERT(vec2_len == num_features)
882 
883  int32_t vlen;
884  bool vfree;
885  int64_t* vec1 = get_feature_vector(vec_idx1, vlen, vfree);
886 
887  ASSERT(vlen == num_features)
888  float64_t result = 0;
889 
890  for (int32_t i = 0; i < num_features; i++)
891  result += vec1[i] * vec2[i];
892 
893  free_feature_vector(vec1, vec_idx1, vfree);
894 
895  return result;
896 }
897 
899  int32_t vec_idx1, const float64_t* vec2, int32_t vec2_len)
900 {
901  ASSERT(vec2_len == num_features)
902 
903  int32_t vlen;
904  bool vfree;
905  uint64_t* vec1 = get_feature_vector(vec_idx1, vlen, vfree);
906 
907  ASSERT(vlen == num_features)
908  float64_t result = 0;
909 
910  for (int32_t i = 0; i < num_features; i++)
911  result += vec1[i] * vec2[i];
912 
913  free_feature_vector(vec1, vec_idx1, vfree);
914 
915  return result;
916 }
917 
919  int32_t vec_idx1, const float64_t* vec2, int32_t vec2_len)
920 {
921  ASSERT(vec2_len == num_features)
922 
923  int32_t vlen;
924  bool vfree;
925  float32_t* vec1 = get_feature_vector(vec_idx1, vlen, vfree);
926 
927  ASSERT(vlen == num_features)
928  float64_t result = 0;
929 
930  for (int32_t i = 0; i < num_features; i++)
931  result += vec1[i] * vec2[i];
932 
933  free_feature_vector(vec1, vec_idx1, vfree);
934 
935  return result;
936 }
937 
939  int32_t vec_idx1, const float64_t* vec2, int32_t vec2_len)
940 {
941  ASSERT(vec2_len == num_features)
942 
943  int32_t vlen;
944  bool vfree;
945  float64_t* vec1 = get_feature_vector(vec_idx1, vlen, vfree);
946 
947  ASSERT(vlen == num_features)
948  float64_t result = CMath::dot(vec1, vec2, num_features);
949 
950  free_feature_vector(vec1, vec_idx1, vfree);
951 
952  return result;
953 }
954 
956  int32_t vec_idx1, const float64_t* vec2, int32_t vec2_len)
957 {
958  ASSERT(vec2_len == num_features)
959 
960  int32_t vlen;
961  bool vfree;
962  floatmax_t* vec1 = get_feature_vector(vec_idx1, vlen, vfree);
963 
964  ASSERT(vlen == num_features)
965  float64_t result = 0;
966 
967  for (int32_t i = 0; i < num_features; i++)
968  result += vec1[i] * vec2[i];
969 
970  free_feature_vector(vec1, vec_idx1, vfree);
971 
972  return result;
973 }
974 
975 template<class ST> bool CDenseFeatures<ST>::is_equal(CDenseFeatures* rhs)
976 {
977  if ( num_features != rhs->num_features || num_vectors != rhs->num_vectors )
978  return false;
979 
980  ST* vec1;
981  ST* vec2;
982  int32_t v1len, v2len;
983  bool v1free, v2free, stop = false;
984 
985  for (int32_t i = 0; i < num_vectors; i++)
986  {
987  vec1 = get_feature_vector(i, v1len, v1free);
988  vec2 = rhs->get_feature_vector(i, v2len, v2free);
989 
990  if (v1len!=v2len)
991  stop = true;
992 
993  for (int32_t j=0; j<v1len; j++)
994  {
995  if (vec1[j]!=vec2[j])
996  stop = true;
997  }
998 
999  free_feature_vector(vec1, i, v1free);
1000  free_feature_vector(vec2, i, v2free);
1001 
1002  if (stop)
1003  return false;
1004  }
1005 
1006  return true;
1007 }
1008 
1009 template <class ST>
1011 {
1012  SG_DEBUG("Entering.\n");
1013 
1014  REQUIRE(others!=nullptr, "The list of other feature instances is not initialized!\n");
1015 
1016  auto current=others->get_first_element();
1017  auto total_num_vectors=get_num_vectors();
1018  auto unref_required=others->get_delete_data();
1019 
1020  while (current!=nullptr)
1021  {
1022  auto casted=dynamic_cast<CDenseFeatures<ST>*>(current);
1023 
1024  REQUIRE(casted!=nullptr, "Provided object's type (%s) must match own type (%s)!\n",
1025  current->get_name(), get_name());
1026  REQUIRE(num_features==casted->num_features,
1027  "Provided feature object has different dimension (%d) than this one (%d)!\n",
1028  casted->num_features, num_features);
1029 
1030  total_num_vectors+=casted->get_num_vectors();
1031 
1032  if (unref_required)
1033  SG_UNREF(current);
1034 
1035  current=others->get_next_element();
1036  }
1037 
1038  SGMatrix<ST> data(num_features, total_num_vectors);
1039  index_t num_copied=0;
1040  copy_feature_matrix(data, num_copied);
1041  num_copied+=get_num_vectors();
1042 
1043  current=others->get_first_element();
1044 
1045  while (current!=nullptr)
1046  {
1047  auto casted=static_cast<CDenseFeatures<ST>*>(current);
1048  casted->copy_feature_matrix(data, num_copied);
1049  num_copied+=casted->get_num_vectors();
1050 
1051  if (unref_required)
1052  SG_UNREF(current);
1053 
1054  current=others->get_next_element();
1055  }
1056 
1057  auto result=new CDenseFeatures<ST>(data);
1058 
1059  SG_DEBUG("Leaving.\n");
1060  return result;
1061 }
1062 
1063 template <class ST>
1065 {
1066  auto list=some<CList>();
1067  list->append_element(other);
1068  return create_merged_copy(list);
1069 }
1070 
1071 template<class ST>
1073 {
1074  SGMatrix<ST> matrix;
1075  matrix.load(loader);
1076  set_feature_matrix(matrix);
1077 }
1078 
1079 template<class ST>
1081 {
1082  feature_matrix.save(writer);
1083 }
1084 
1086 {
1087  REQUIRE(base_features->get_feature_class() == C_DENSE,
1088  "base_features must be of dynamic type CDenseFeatures\n")
1089 
1090  return (CDenseFeatures< ST >*) base_features;
1091 }
1092 
1093 template class CDenseFeatures<bool>;
1094 template class CDenseFeatures<char>;
1095 template class CDenseFeatures<int8_t>;
1096 template class CDenseFeatures<uint8_t>;
1097 template class CDenseFeatures<int16_t>;
1098 template class CDenseFeatures<uint16_t>;
1099 template class CDenseFeatures<int32_t>;
1100 template class CDenseFeatures<uint32_t>;
1101 template class CDenseFeatures<int64_t>;
1102 template class CDenseFeatures<uint64_t>;
1103 template class CDenseFeatures<float32_t>;
1104 template class CDenseFeatures<float64_t>;
1105 template class CDenseFeatures<floatmax_t>;
1106 }
virtual const char * get_name() const =0
CSubsetStack * m_subset_stack
Definition: Features.h:361
void set_feature_vector(SGVector< ST > vector, int32_t num)
virtual int32_t get_dim_feature_space() const
#define SG_INFO(...)
Definition: SGIO.h:117
CSGObject * get_next_element()
Definition: List.h:185
The class DenseFeatures implements dense feature matrices.
Definition: LDA.h:40
virtual void load(CFile *loader)
ST * get_feature_vector(int32_t num, int32_t &len, bool &dofree)
int32_t get_num_features() const
int32_t index_t
Definition: common.h:72
CDenseFeatures(int32_t size=0)
virtual CFeatures * duplicate() const
SGMatrix< ST > get_feature_matrix()
void set_feature_matrix(SGMatrix< ST > matrix)
virtual int32_t get_num_vectors() const =0
#define SG_ERROR(...)
Definition: SGIO.h:128
#define REQUIRE(x,...)
Definition: SGIO.h:205
#define SG_NOTIMPLEMENTED
Definition: SGIO.h:138
T dot(const SGVector< T > &a, const SGVector< T > &b)
index_t num_cols
Definition: SGMatrix.h:465
void vector_subset(int32_t *idx, int32_t idx_len)
bool get_delete_data()
Definition: List.h:575
Features that support dot products among other operations.
Definition: DotFeatures.h:44
#define SG_REF(x)
Definition: SGObject.h:52
int32_t num_features
number of features in cache
index_t num_rows
Definition: SGMatrix.h:463
EFeatureClass
shogun feature class
Definition: FeatureTypes.h:38
class to add subset support to another class. A CSubsetStackStack instance should be added and wrappe...
Definition: SubsetStack.h:37
virtual int32_t get_dim_feature_space() const =0
virtual void save(CFile *saver)
index_t vlen
Definition: SGVector.h:545
CSGObject * get_first_element()
Definition: List.h:151
#define ASSERT(x)
Definition: SGIO.h:200
virtual int32_t get_num_vectors() const
Template class DensePreprocessor, base class for preprocessors (cf. CPreprocessor) that apply to CDen...
shogun vector
double float64_t
Definition: common.h:60
void set_num_vectors(int32_t num)
long double floatmax_t
Definition: common.h:61
virtual float64_t dot(int32_t vec_idx1, CDotFeatures *df, int32_t vec_idx2)
A File access base class.
Definition: File.h:34
bool equals(const SGMatrix< T > &other) const
Definition: SGMatrix.cpp:144
CDenseFeatures< ST > * get_transposed()
void copy_feature_matrix(SGMatrix< ST > target, index_t column_offset=0) const
virtual EFeatureClass get_feature_class() const =0
void set_num_features(int32_t num)
SGMatrix< ST > feature_matrix
static float64_t dot(const bool *v1, const bool *v2, int32_t n)
Compute dot product between v1 and v2 (blas optimized)
Definition: Math.h:622
float float32_t
Definition: common.h:59
void feature_subset(int32_t *idx, int32_t idx_len)
#define SG_UNREF(x)
Definition: SGObject.h:53
#define SG_DEBUG(...)
Definition: SGIO.h:106
all of classes and functions are contained in the shogun namespace
Definition: class_list.h:18
#define SG_SDEBUG(...)
Definition: SGIO.h:167
void free_feature_vector(ST *feat_vec, int32_t num, bool dofree)
virtual bool reshape(int32_t p_num_features, int32_t p_num_vectors)
virtual EFeatureClass get_feature_class() const
The class Features is the base class of all feature objects.
Definition: Features.h:68
virtual SGMatrix< ST > apply_to_feature_matrix(CFeatures *features)=0
void obtain_from_dot(CDotFeatures *df)
int32_t num_vectors
number of vectors in cache
SGVector< float64_t > get_computed_dot_feature_vector(int32_t num)
virtual bool apply_preprocessor(bool force_preprocessing=false)
#define SG_ADD(...)
Definition: SGObject.h:94
T max(const Container< T > &a)
SGMatrix< ST > steal_feature_matrix()
virtual SGVector< ST > apply_to_feature_vector(SGVector< ST > vector)=0
virtual void add_to_dense_vec(float64_t alpha, int32_t vec_idx1, float64_t *vec2, int32_t vec2_len, bool abs_val=false)
virtual void add_subset(SGVector< index_t > subset)
Definition: Features.cpp:310
virtual EFeatureType get_feature_type() const =0
Class List implements a doubly connected list for low-level-objects.
Definition: List.h:84
#define GET_FEATURE_TYPE(f_type, sg_type)
static T abs(T a)
Definition: Math.h:175
void load(CFile *loader)
Definition: SGMatrix.cpp:1150

SHOGUN Machine Learning Toolbox - Documentation