SHOGUN  3.2.1
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
SGSparseMatrix.cpp
Go to the documentation of this file.
1 #include <shogun/lib/SGMatrix.h>
4 #include <shogun/io/File.h>
5 #include <shogun/io/SGIO.h>
6 #include <shogun/io/LibSVMFile.h>
7 
8 namespace shogun {
9 
10 template <class T>
12 {
13  init_data();
14 }
15 
16 template <class T>
18  index_t num_vec, bool ref_counting) :
19  SGReferencedData(ref_counting),
20  num_vectors(num_vec), num_features(num_feat),
21  sparse_matrix(vecs)
22 {
23 }
24 
25 template <class T>
26 SGSparseMatrix<T>::SGSparseMatrix(index_t num_feat, index_t num_vec, bool ref_counting) :
27  SGReferencedData(ref_counting),
28  num_vectors(num_vec), num_features(num_feat)
29 {
31 }
32 
33 template <class T>
35 {
36  from_dense(dense);
37 }
38 
39 template <class T>
41 {
42  copy_data(orig);
43 }
44 
45 template <class T>
47 {
48  unref();
49 }
50 
51 template <> template <>
53  SGVector<float64_t> v) const
54 {
55  SGVector<complex128_t> result(num_vectors);
56  REQUIRE(v.vlen==num_features,
57  "Dimension mismatch! %d vs %d\n",
58  v.vlen, num_features);
59  for (index_t i=0; i<num_vectors; ++i)
60  result[i]=sparse_matrix[i].dense_dot(v);
61  return result;
62 }
63 
64 template <> template <>
66  SGVector<int32_t> v) const
67 {
68  SGVector<complex128_t> result(num_vectors);
69  REQUIRE(v.vlen==num_features,
70  "Dimension mismatch! %d vs %d\n",
71  v.vlen, num_features);
72  for (index_t i=0; i<num_vectors; ++i)
73  result[i]=sparse_matrix[i].dense_dot(v);
74  return result;
75 }
76 
77 template <> template <>
79  SGVector<int32_t> v) const
80 {
81  SGVector<float64_t> result(num_vectors);
82  REQUIRE(v.vlen==num_features,
83  "Dimension mismatch! %d vs %d\n",
84  v.vlen, num_features);
85  for (index_t i=0; i<num_vectors; ++i)
86  result[i]=sparse_matrix[i].dense_dot(v);
87  return result;
88 }
89 
90 template<class T>
92 {
93  ASSERT(loader)
94  unref();
95 
97  loader->get_sparse_matrix(sparse_matrix, num_features, num_vectors);
99 }
100 
101 template<>
103 {
104  SG_SERROR("SGSparseMatrix::load():: Not supported for complex128_t");
105 }
106 
107 template<class T> SGVector<float64_t> SGSparseMatrix<T>::load_with_labels(CLibSVMFile* file, bool do_sort_features)
108 {
109  ASSERT(file)
110 
111  float64_t* raw_labels;
112  file->get_sparse_matrix(sparse_matrix, num_features, num_vectors,
113  raw_labels, true);
114 
115  SGVector<float64_t> labels(raw_labels, num_vectors);
116 
117  if (do_sort_features)
118  sort_features();
119 
120  return labels;
121 }
122 
124 
125 
126 template<class T>
128 {
129  ASSERT(saver)
130 
132  saver->set_sparse_matrix(sparse_matrix, num_features, num_vectors);
134 }
135 
136 template<>
138 {
139  SG_SERROR("SGSparseMatrix::save():: Not supported for complex128_t");
140 }
141 
143  SGVector<float64_t> labels)
144 {
145  ASSERT(file)
146  int32_t num=labels.vlen;
147  ASSERT(num>0)
148  ASSERT(num==num_vectors)
149 
150  float64_t* raw_labels=labels.vector;
151  file->set_sparse_matrix(sparse_matrix, num_features, num_vectors,
152  raw_labels);
153 }
154 
156 
157 
158 template <class T>
160 {
161  sparse_matrix = ((SGSparseMatrix*)(&orig))->sparse_matrix;
162  num_vectors = ((SGSparseMatrix*)(&orig))->num_vectors;
163  num_features = ((SGSparseMatrix*)(&orig))->num_features;
164 }
165 
166 template <class T>
168 {
169  sparse_matrix = NULL;
170  num_vectors = 0;
171  num_features = 0;
172 }
173 
174 template <class T>
176 {
177  SG_FREE(sparse_matrix);
178  num_vectors = 0;
179  num_features = 0;
180 }
181 
183 {
184  SGSparseMatrix<T> sfm(num_vectors, num_features);
185 
186  int32_t* hist=SG_CALLOC(int32_t, num_features);
187 
188  // count the lengths of future feature vectors
189  for (int32_t v=0; v<num_vectors; v++)
190  {
191  SGSparseVector<T> sv=sparse_matrix[v];
192 
193  for (int32_t i=0; i<sv.num_feat_entries; i++)
194  hist[sv.features[i].feat_index]++;
195  }
196 
197  for (int32_t v=0; v<num_features; v++)
198  sfm[v]=SGSparseVector<T>(hist[v]);
199 
200  SG_FREE(hist);
201 
202  int32_t* index=SG_CALLOC(int32_t, num_vectors);
203 
204  // fill future feature vectors with content
205  for (int32_t v=0; v<num_vectors; v++)
206  {
207  SGSparseVector<T> sv=sparse_matrix[v];
208 
209  for (int32_t i=0; i<sv.num_feat_entries; i++)
210  {
211  int32_t vidx=sv.features[i].feat_index;
212  int32_t fidx=v;
213  sfm[vidx].features[index[vidx]].feat_index=fidx;
214  sfm[vidx].features[index[vidx]].entry=sv.features[i].entry;
215  index[vidx]++;
216  }
217  }
218 
219  SG_FREE(index);
220  return sfm;
221 }
222 
223 
224 template<class T> void SGSparseMatrix<T>::sort_features()
225 {
226  for (int32_t i=0; i<num_vectors; i++)
227  {
228  sparse_matrix[i].sort_features();
229  }
230 }
231 
232 template<class T> void SGSparseMatrix<T>::from_dense(SGMatrix<T> full)
233 {
234  T* src=full.matrix;
235  int32_t num_feat=full.num_rows;
236  int32_t num_vec=full.num_cols;
237 
238  REQUIRE(num_vec>0, "Matrix should have > 0 vectors!\n");
239 
240  SG_SINFO("converting dense feature matrix to sparse one\n")
241  int32_t* num_feat_entries=SG_MALLOC(int, num_vec);
242 
243 
244  int64_t num_total_entries=0;
245 
246  // count nr of non sparse features
247  for (int32_t i=0; i<num_vec; i++)
248  {
249  num_feat_entries[i]=0;
250  for (int32_t j=0; j<num_feat; j++)
251  {
252  if (src[i*((int64_t) num_feat) + j] != static_cast<T>(0))
253  num_feat_entries[i]++;
254  }
255  }
256 
257  num_features=num_feat;
258  num_vectors=num_vec;
259  sparse_matrix=SG_MALLOC(SGSparseVector<T>,num_vec);
260 
261  for (int32_t i=0; i< num_vec; i++)
262  {
263  sparse_matrix[i]=SGSparseVector<T>(num_feat_entries[i]);
264  int32_t sparse_feat_idx=0;
265 
266  for (int32_t j=0; j< num_feat; j++)
267  {
268  int64_t pos= i*num_feat + j;
269 
270  if (src[pos] != static_cast<T>(0))
271  {
272  sparse_matrix[i].features[sparse_feat_idx].entry=src[pos];
273  sparse_matrix[i].features[sparse_feat_idx].feat_index=j;
274  sparse_feat_idx++;
275  num_total_entries++;
276  }
277  }
278  }
279 
280  SG_SINFO("sparse feature matrix has %ld entries (full matrix had %ld, sparsity %2.2f%%)\n",
281  num_total_entries, int64_t(num_feat)*num_vec, (100.0*num_total_entries)/(int64_t(num_feat)*num_vec));
282  SG_FREE(num_feat_entries);
283 }
284 
285 template class SGSparseMatrix<bool>;
286 template class SGSparseMatrix<char>;
287 template class SGSparseMatrix<int8_t>;
288 template class SGSparseMatrix<uint8_t>;
289 template class SGSparseMatrix<int16_t>;
290 template class SGSparseMatrix<uint16_t>;
291 template class SGSparseMatrix<int32_t>;
292 template class SGSparseMatrix<uint32_t>;
293 template class SGSparseMatrix<int64_t>;
294 template class SGSparseMatrix<uint64_t>;
295 template class SGSparseMatrix<float32_t>;
296 template class SGSparseMatrix<float64_t>;
297 template class SGSparseMatrix<floatmax_t>;
298 template class SGSparseMatrix<complex128_t>;
299 }

SHOGUN Machine Learning Toolbox - Documentation