SHOGUN  4.1.0
LeastAngleRegression.cpp
Go to the documentation of this file.
1 /*
2  * This program is free software; you can redistribute it and/or modify
4  * the Free Software Foundation; either version 3 of the License, or
5  * (at your option) any later version.
6  *
7  * Written (W) 2012 Chiyuan Zhang
8  * Copyright (C) 2012 Chiyuan Zhang
9  */
10
11 #include <shogun/lib/config.h>
12
13 #ifdef HAVE_LAPACK
14
15 #include <vector>
16 #include <limits>
17 #include <algorithm>
18
24
25 using namespace shogun;
26 using namespace std;
27
28 static vector<float64_t> make_vector(int32_t size, float64_t val)
29 {
30  vector<float64_t> result(size);
31  fill(result.begin(), result.end(), val);
32  return result;
33 }
34
35 static void plane_rot(float64_t x0, float64_t x1,
37 {
38  G.zero();
39  if (x1 == 0)
40  {
41  G(0, 0) = G(1, 1) = 1;
42  y0 = x0;
43  y1 = x1;
44  }
45  else
46  {
47  float64_t r = CMath::sqrt(x0*x0+x1*x1);
48  float64_t sx0 = x0 / r;
49  float64_t sx1 = x1 / r;
50
51  G(0,0) = sx0;
52  G(1,0) = -sx1;
53  G(0,1) = sx1;
54  G(1,1) = sx0;
55
56  y0 = r;
57  y1 = 0;
58  }
59 }
60
61 static void find_max_abs(const vector<float64_t> &vec, const vector<bool> &ignore_mask,
62  int32_t &imax, float64_t& vmax)
63 {
64  imax = -1;
65  vmax = -1;
66  for (uint32_t i=0; i < vec.size(); ++i)
67  {
69  continue;
70
71  if (CMath::abs(vec[i]) > vmax)
72  {
73  vmax = CMath::abs(vec[i]);
74  imax = i;
75  }
76  }
77 }
78
80  CLinearMachine(), m_lasso(lasso),
81  m_max_nonz(0), m_max_l1_norm(0)
82 {
83  SG_ADD(&m_max_nonz, "max_nonz", "Max number of non-zero variables", MS_AVAILABLE);
84  SG_ADD(&m_max_l1_norm, "max_l1_norm", "Max l1-norm of estimator", MS_AVAILABLE);
85 }
86
88 {
89
90 }
91
93 {
94  if (!m_labels)
95  SG_ERROR("No labels set\n")
97  SG_ERROR("Expected RegressionLabels\n")
98
99  if (!data)
100  data=features;
101
102  if (!data)
103  SG_ERROR("No features set\n")
104
105  if (m_labels->get_num_labels() != data->get_num_vectors())
106  SG_ERROR("Number of training vectors does not match number of labels\n")
107
108  if (data->get_feature_class() != C_DENSE)
109  SG_ERROR("Expected Simple Features\n")
110
111  if (data->get_feature_type() != F_DREAL)
112  SG_ERROR("Expected Real Features\n")
113
115  int32_t n_fea = feats->get_num_features();
116  int32_t n_vec = feats->get_num_vectors();
117
118  bool lasso_cond = false;
119  bool stop_cond = false;
120
121  // init facilities
122  m_beta_idx.clear();
123  m_beta_path.clear();
124  m_num_active = 0;
125  m_active_set.clear();
126  m_is_active.resize(n_fea);
127  fill(m_is_active.begin(), m_is_active.end(), false);
128
129  SGVector<float64_t> y = ((CRegressionLabels*) m_labels)->get_labels();
130  SGMatrix<float64_t> Xorig = feats->get_feature_matrix();
131
132  // transpose(X) is more convenient to work with since we care
133  // about features here. After transpose, each row will be a data
134  // point while each column corresponds to a feature
135  SGMatrix<float64_t> X(n_vec, n_fea, true);
136  for (int32_t i=0; i < n_vec; ++i)
137  {
138  for (int32_t j=0; j < n_fea; ++j)
139  X(i,j) = Xorig(j,i);
140  }
141
142  // beta is the estimator
143  vector<float64_t> beta = make_vector(n_fea, 0);
144
145  vector<float64_t> Xy = make_vector(n_fea, 0);
146  // Xy = X' * y
147  cblas_dgemv(CblasColMajor, CblasTrans, n_vec, n_fea, 1, X.matrix, n_vec,
148  y.vector, 1, 0, &Xy[0], 1);
149
150  // mu is the prediction
151  vector<float64_t> mu = make_vector(n_vec, 0);
152
153  // correlation
154  vector<float64_t> corr = make_vector(n_fea, 0);
155  // sign of correlation
156  vector<float64_t> corr_sign(n_fea);
157
158  // Cholesky factorization R'R = X'X, R is upper triangular
160
161  float64_t max_corr = 1;
162  int32_t i_max_corr = 1;
163
164  // first entry: all coefficients are zero
165  m_beta_path.push_back(beta);
166  m_beta_idx.push_back(0);
167
168  //maximum allowed active variables at a time
169  int32_t max_active_allowed = CMath::min(n_vec-1, n_fea);
170
171  //========================================
172  // main loop
173  //========================================
174  int32_t nloop=0;
175  while (m_num_active < max_active_allowed && max_corr > CMath::MACHINE_EPSILON && !stop_cond)
176  {
177  // corr = X' * (y-mu) = - X'*mu + Xy
178  copy(Xy.begin(), Xy.end(), corr.begin());
179  cblas_dgemv(CblasColMajor, CblasTrans, n_vec, n_fea, -1,
180  X.matrix, n_vec, &mu[0], 1, 1, &corr[0], 1);
181
182  // corr_sign = sign(corr)
183  for (uint32_t i=0; i < corr.size(); ++i)
184  corr_sign[i] = CMath::sign(corr[i]);
185
186  // find max absolute correlation in inactive set
187  find_max_abs(corr, m_is_active, i_max_corr, max_corr);
188
189  if (!lasso_cond)
190  {
191  // update Cholesky factorization matrix
192  R=cholesky_insert(X, R, i_max_corr);
193  activate_variable(i_max_corr);
194  }
195
196  // corr_sign_a = corr_sign[m_active_set]
197  vector<float64_t> corr_sign_a(m_num_active);
198  for (int32_t i=0; i < m_num_active; ++i)
199  corr_sign_a[i] = corr_sign[m_active_set[i]];
200
201  // GA1 = R\(R'\corr_sign_a)
202  vector<float64_t> GA1(corr_sign_a);
203  cblas_dtrsm(CblasColMajor, CblasLeft, CblasUpper, CblasTrans, CblasNonUnit,
204  m_num_active, 1, 1, R.matrix, m_num_active, &GA1[0], m_num_active);
205  cblas_dtrsm(CblasColMajor, CblasLeft, CblasUpper, CblasNoTrans, CblasNonUnit,
206  m_num_active, 1, 1, R.matrix, m_num_active, &GA1[0], m_num_active);
207
208  // AA = 1/sqrt(GA1' * corr_sign_a)
209  float64_t AA = cblas_ddot(m_num_active, &GA1[0], 1, &corr_sign_a[0], 1);
210  AA = 1/CMath::sqrt(AA);
211
212  // wA = AA*GA1
213  vector<float64_t> wA(GA1);
214  for (int32_t i=0; i < m_num_active; ++i)
215  wA[i] *= AA;
216
217  // equiangular direction (unit vector)
218  vector<float64_t> u = make_vector(n_vec, 0);
219  // u = X[:,m_active_set] * wA
220  for (int32_t i=0; i < m_num_active; ++i)
221  {
222  // u += wA[i] * X[:,m_active_set[i]]
223  cblas_daxpy(n_vec, wA[i],
224  X.get_column_vector(m_active_set[i]), 1, &u[0], 1);
225  }
226
227  // step size
228  float64_t gamma = max_corr / AA;
229  if (m_num_active < n_fea)
230  {
231  for (int32_t i=0; i < n_fea; ++i)
232  {
233  if (m_is_active[i])
234  continue;
235
236  // correlation between X[:,i] and u
237  float64_t dir_corr = cblas_ddot(n_vec,
238  X.get_column_vector(i), 1, &u[0], 1);
239
240  float64_t tmp1 = (max_corr-corr[i])/(AA-dir_corr);
241  float64_t tmp2 = (max_corr+corr[i])/(AA+dir_corr);
242  if (tmp1 > CMath::MACHINE_EPSILON && tmp1 < gamma)
243  gamma = tmp1;
244  if (tmp2 > CMath::MACHINE_EPSILON && tmp2 < gamma)
245  gamma = tmp2;
246  }
247  }
248
249  int32_t i_kick=-1;
250  int32_t i_change=i_max_corr;
251  if (m_lasso)
252  {
253  // lasso modification to further refine gamma
254  lasso_cond = false;
256
257  for (int32_t i=0; i < m_num_active; ++i)
258  {
259  float64_t tmp = -beta[m_active_set[i]] / wA[i];
260  if (tmp > CMath::MACHINE_EPSILON && tmp < lasso_bound)
261  {
262  lasso_bound = tmp;
263  i_kick = i;
264  }
265  }
266
267  if (lasso_bound < gamma)
268  {
269  gamma = lasso_bound;
270  lasso_cond = true;
271  i_change = m_active_set[i_kick];
272  }
273  }
274
275  // update prediction: mu = mu + gamma * u
276  cblas_daxpy(n_vec, gamma, &u[0], 1, &mu[0], 1);
277
278  // update estimator
279  for (int32_t i=0; i < m_num_active; ++i)
280  beta[m_active_set[i]] += gamma * wA[i];
281
282  // early stopping on max l1-norm
283  if (m_max_l1_norm > 0)
284  {
285  float64_t l1 = SGVector<float64_t>::onenorm(&beta[0], n_fea);
286  if (l1 > m_max_l1_norm)
287  {
288  // stopping with interpolated beta
289  stop_cond = true;
290  lasso_cond = false;
291  float64_t l1_prev = SGVector<float64_t>::onenorm(&m_beta_path[nloop][0], n_fea);
292  float64_t s = (m_max_l1_norm-l1_prev)/(l1-l1_prev);
293
294  // beta = beta_prev + s*(beta-beta_prev)
295  // = (1-s)*beta_prev + s*beta
296  cblas_dscal(n_fea, s, &beta[0], 1);
297  cblas_daxpy(n_fea, 1-s, &m_beta_path[nloop][0], 1, &beta[0], 1);
298  }
299  }
300
301  // if lasso cond, drop the variable from active set
302  if (lasso_cond)
303  {
304  beta[i_change] = 0; // ensure it be zero
305
306  // update Cholesky factorization
307  R=cholesky_delete(R, i_kick);
308  deactivate_variable(i_kick);
309  }
310
311  nloop++;
312  m_beta_path.push_back(beta);
313  if (size_t(m_num_active) >= m_beta_idx.size())
314  m_beta_idx.push_back(nloop);
315  else
316  m_beta_idx[m_num_active] = nloop;
317
318  // early stopping with max number of non-zero variables
319  if (m_max_nonz > 0 && m_num_active >= m_max_nonz)
320  stop_cond = true;
321
322  } // main loop
323
324  // assign default estimator
325  w.vlen = n_fea;
326  switch_w(m_beta_idx.size()-1);
327
328  return true;
329 }
330
331 SGMatrix<float64_t> CLeastAngleRegression::cholesky_insert(
332  SGMatrix<float64_t> X, SGMatrix<float64_t> R, int32_t i_max_corr)
333 {
334  // diag_k = X[:,i_max_corr]' * X[:,i_max_corr]
335  float64_t diag_k = cblas_ddot(X.num_rows, X.get_column_vector(i_max_corr), 1,
336  X.get_column_vector(i_max_corr), 1);
337
338  if (m_num_active == 0)
339  { // R isn't allocated yet
340  SGMatrix<float64_t> nR(1,1);
341  nR(0,0) = CMath::sqrt(diag_k);
342  return nR;
343  }
344  else
345  {
346
347  // col_k is the k-th column of (X'X)
348  vector<float64_t> col_k(m_num_active);
349  for (int32_t i=0; i < m_num_active; ++i)
350  {
351  // col_k[i] = X[:,i_max_corr]' * X[:,m_active_set[i]]
352  col_k[i] = cblas_ddot(X.num_rows, X.get_column_vector(i_max_corr), 1,
353  X.get_column_vector(m_active_set[i]), 1);
354  }
355
356  // R' * R_k = (X' * X)_k = col_k, solving to get R_k
357  vector<float64_t> R_k(col_k);
358  cblas_dtrsm(CblasColMajor, CblasLeft, CblasUpper, CblasTrans, CblasNonUnit, m_num_active, 1,
359  1, R.matrix, m_num_active, &R_k[0], m_num_active);
360
361  float64_t R_kk = CMath::sqrt(diag_k -
362  cblas_ddot(m_num_active, &R_k[0], 1, &R_k[0], 1));
363
364  // new_R = [R R_k; zeros(...) R_kk]
365  SGMatrix<float64_t> nR(m_num_active+1, m_num_active+1);
366  for (int32_t i=0; i < m_num_active; ++i)
367  for (int32_t j=0; j < m_num_active; ++j)
368  nR(i,j) = R(i,j);
369  for (int32_t i=0; i < m_num_active; ++i)
370  nR(i, m_num_active) = R_k[i];
371  for (int32_t i=0; i < m_num_active; ++i)
372  nR(m_num_active, i) = 0;
373  nR(m_num_active, m_num_active) = R_kk;
374
375  return nR;
376  }
377
378 }
379
380 SGMatrix<float64_t> CLeastAngleRegression::cholesky_delete(SGMatrix<float64_t> R, int32_t i_kick)
381 {
382  if (i_kick != m_num_active-1)
383  {
384  // remove i_kick-th column
385  for (int32_t j=i_kick; j < m_num_active-1; ++j)
386  for (int32_t i=0; i < m_num_active; ++i)
387  R(i,j) = R(i,j+1);
388
389  SGMatrix<float64_t> G(2,2);
390  for (int32_t i=i_kick; i < m_num_active-1; ++i)
391  {
392  plane_rot(R(i,i),R(i+1,i), R(i,i), R(i+1,i), G);
393  if (i < m_num_active-2)
394  {
395  for (int32_t k=i+1; k < m_num_active-1; ++k)
396  {
397  // R[i:i+1, k] = G*R[i:i+1, k]
398  float64_t Rik = R(i,k), Ri1k = R(i+1,k);
399  R(i,k) = G(0,0)*Rik + G(0,1)*Ri1k;
400  R(i+1,k) = G(1,0)*Rik+G(1,1)*Ri1k;
401  }
402  }
403  }
404  }
405
406  SGMatrix<float64_t> nR(m_num_active-1, m_num_active-1);
407  for (int32_t i=0; i < m_num_active-1; ++i)
408  for (int32_t j=0; j < m_num_active-1; ++j)
409  nR(i,j) = R(i,j);
410
411  return nR;
412 }
413
414 #endif // HAVE_LAPACK
static const float64_t MACHINE_EPSILON
Definition: Math.h:2058
virtual ELabelType get_label_type() const =0
Real Labels are real-valued labels.
int32_t get_num_features() const
static vector< float64_t > make_vector(int32_t size, float64_t val)
virtual int32_t get_num_labels() const =0
real valued labels (e.g. for regression, classifier outputs)
Definition: LabelTypes.h:22
SGMatrix< ST > get_feature_matrix()
virtual int32_t get_num_vectors() const =0
CLabels * m_labels
Definition: Machine.h:361
#define SG_ERROR(...)
Definition: SGIO.h:129
static void find_max_abs(const vector< float64_t > &vec, const vector< bool > &ignore_mask, int32_t &imax, float64_t &vmax)
index_t num_rows
Definition: SGMatrix.h:376
virtual bool train_machine(CFeatures *data=NULL)
index_t vlen
Definition: SGVector.h:494
void switch_w(int32_t num_variable)
static void plane_rot(float64_t x0, float64_t x1, float64_t &y0, float64_t &y1, SGMatrix< float64_t > &G)
virtual int32_t get_num_vectors() const
double float64_t
Definition: common.h:50
SGVector< float64_t > w
virtual EFeatureClass get_feature_class() const =0
T * get_column_vector(index_t col) const
Definition: SGMatrix.h:115
Class LinearMachine is a generic interface for all kinds of linear machines like classifiers.
Definition: LinearMachine.h:63
CDotFeatures * features
all of classes and functions are contained in the shogun namespace
Definition: class_list.h:18
static T sign(T a)
Definition: Math.h:426
The class Features is the base class of all feature objects.
Definition: Features.h:68
static T min(T a, T b)
Definition: Math.h:157
static float64_t onenorm(T *x, int32_t len)
|| x ||_1
Definition: SGVector.cpp:715
Matrix::Scalar max(Matrix m)
Definition: Redux.h:66