SHOGUN  4.1.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
EPInferenceMethod.cpp
Go to the documentation of this file.
1 /*
2  * This program is free software; you can redistribute it and/or modify
3  * it under the terms of the GNU General Public License as published by
4  * the Free Software Foundation; either version 3 of the License, or
5  * (at your option) any later version.
6  *
7  * Written (W) 2013 Roman Votyakov
8  *
9  * Based on ideas from GAUSSIAN PROCESS REGRESSION AND CLASSIFICATION Toolbox
10  * Copyright (C) 2005-2013 by Carl Edward Rasmussen & Hannes Nickisch under the
11  * FreeBSD License
12  * http://www.gaussianprocess.org/gpml/code/matlab/doc/
13  */
14 
16 
17 #ifdef HAVE_EIGEN3
18 
24 
26 
27 using namespace shogun;
28 using namespace Eigen;
29 
30 // try to use previously allocated memory for SGVector
31 #define CREATE_SGVECTOR(vec, len, sg_type) \
32  { \
33  if (!vec.vector || vec.vlen!=len) \
34  vec=SGVector<sg_type>(len); \
35  }
36 
37 // try to use previously allocated memory for SGMatrix
38 #define CREATE_SGMATRIX(mat, rows, cols, sg_type) \
39  { \
40  if (!mat.matrix || mat.num_rows!=rows || mat.num_cols!=cols) \
41  mat=SGMatrix<sg_type>(rows, cols); \
42  }
43 
45 {
46  init();
47 }
48 
50  CMeanFunction* mean, CLabels* labels, CLikelihoodModel* model)
51  : CInferenceMethod(kernel, features, mean, labels, model)
52 {
53  init();
54 }
55 
57 {
58 }
59 
60 void CEPInferenceMethod::init()
61 {
62  m_max_sweep=15;
63  m_min_sweep=2;
64  m_tol=1e-4;
65 }
66 
68  CInferenceMethod* inference)
69 {
70  if (inference==NULL)
71  return NULL;
72 
73  if (inference->get_inference_type()!=INF_EP)
74  SG_SERROR("Provided inference is not of type CEPInferenceMethod!\n")
75 
76  SG_REF(inference);
77  return (CEPInferenceMethod*)inference;
78 }
79 
81 {
83  update();
84 
85  return m_nlZ;
86 }
87 
89 {
91  update();
92 
94 }
95 
97 {
99  update();
100 
101  return SGMatrix<float64_t>(m_L);
102 }
103 
105 {
107  update();
108 
109  return SGVector<float64_t>(m_sttau);
110 }
111 
113 {
115 
116  return SGVector<float64_t>(m_mu);
117 }
118 
120 {
122 
123  return SGMatrix<float64_t>(m_Sigma);
124 }
125 
127 {
129 
130  if (!m_gradient_update)
131  {
132  // update matrices to compute derivatives
133  update_deriv();
134  m_gradient_update=true;
136  }
137 }
138 
140 {
141  SG_DEBUG("entering\n");
142 
143  // update kernel and feature matrix
145 
146  // get number of labels (trainig examples)
148 
149  // try to use tilde values from previous call
150  if (m_ttau.vlen==n)
151  {
152  update_chol();
156  }
157 
158  // get mean vector
160 
161  // get and scale diagonal of the kernel matrix
163  ktrtr_diag.scale(CMath::sq(m_scale));
164 
165  // marginal likelihood for ttau = tnu = 0
167  mean, ktrtr_diag, m_labels));
168 
169  // use zero values if we have no better guess or it's better
170  if (m_ttau.vlen!=n || m_nlZ>nlZ0)
171  {
172  CREATE_SGVECTOR(m_ttau, n, float64_t);
173  m_ttau.zero();
174 
175  CREATE_SGVECTOR(m_sttau, n, float64_t);
176  m_sttau.zero();
177 
178  CREATE_SGVECTOR(m_tnu, n, float64_t);
179  m_tnu.zero();
180 
182 
183  // copy data manually, since we don't have appropriate method
184  for (index_t i=0; i<m_ktrtr.num_rows; i++)
185  for (index_t j=0; j<m_ktrtr.num_cols; j++)
186  m_Sigma(i,j)=m_ktrtr(i,j)*CMath::sq(m_scale);
187 
188  CREATE_SGVECTOR(m_mu, n, float64_t);
189  m_mu.zero();
190 
191  // set marginal likelihood
192  m_nlZ=nlZ0;
193  }
194 
195  // create vector of the random permutation
196  SGVector<index_t> v(n);
197  v.range_fill();
198 
199  // cavity tau and nu vectors
200  SGVector<float64_t> tau_n(n);
201  SGVector<float64_t> nu_n(n);
202 
203  // cavity mu and s2 vectors
204  SGVector<float64_t> mu_n(n);
205  SGVector<float64_t> s2_n(n);
206 
207  float64_t nlZ_old=CMath::INFTY;
208  uint32_t sweep=0;
209 
210  while ((CMath::abs(m_nlZ-nlZ_old)>m_tol && sweep<m_max_sweep) ||
211  sweep<m_min_sweep)
212  {
213  nlZ_old=m_nlZ;
214  sweep++;
215 
216  // shuffle random permutation
217  CMath::permute(v);
218 
219  for (index_t j=0; j<n; j++)
220  {
221  index_t i=v[j];
222 
223  // find cavity paramters
224  tau_n[i]=1.0/m_Sigma(i,i)-m_ttau[i];
225  nu_n[i]=m_mu[i]/m_Sigma(i,i)+mean[i]*tau_n[i]-m_tnu[i];
226 
227  // compute cavity mean and variance
228  mu_n[i]=nu_n[i]/tau_n[i];
229  s2_n[i]=1.0/tau_n[i];
230 
231  // get moments
232  float64_t mu=m_model->get_first_moment(mu_n, s2_n, m_labels, i);
233  float64_t s2=m_model->get_second_moment(mu_n, s2_n, m_labels, i);
234 
235  // save old value of ttau
236  float64_t ttau_old=m_ttau[i];
237 
238  // compute ttau and sqrt(ttau)
239  m_ttau[i]=CMath::max(1.0/s2-tau_n[i], 0.0);
240  m_sttau[i]=CMath::sqrt(m_ttau[i]);
241 
242  // compute tnu
243  m_tnu[i]=mu/s2-nu_n[i];
244 
245  // compute difference ds2=ttau_new-ttau_old
246  float64_t ds2=m_ttau[i]-ttau_old;
247 
248  // create eigen representation of Sigma, tnu and mu
249  Map<MatrixXd> eigen_Sigma(m_Sigma.matrix, m_Sigma.num_rows,
250  m_Sigma.num_cols);
251  Map<VectorXd> eigen_tnu(m_tnu.vector, m_tnu.vlen);
252  Map<VectorXd> eigen_mu(m_mu.vector, m_mu.vlen);
253 
254  VectorXd eigen_si=eigen_Sigma.col(i);
255 
256  // rank-1 update Sigma
257  eigen_Sigma=eigen_Sigma-ds2/(1.0+ds2*eigen_si(i))*eigen_si*
258  eigen_si.adjoint();
259 
260  // update mu
261  eigen_mu=eigen_Sigma*eigen_tnu;
262  }
263 
264  // update upper triangular factor (L^T) of Cholesky decomposition of
265  // matrix B, approximate posterior covariance and mean, negative
266  // marginal likelihood
267  update_chol();
271  }
272 
273  if (sweep==m_max_sweep && CMath::abs(m_nlZ-nlZ_old)>m_tol)
274  {
275  SG_ERROR("Maximum number (%d) of sweeps reached, but tolerance (%f) was "
276  "not yet reached. You can manually set maximum number of sweeps "
277  "or tolerance to fix this problem.\n", m_max_sweep, m_tol);
278  }
279 
280  // update vector alpha
281  update_alpha();
282 
283  m_gradient_update=false;
284 
285  // update hash of the parameters
287 
288  SG_DEBUG("leaving\n");
289 }
290 
292 {
293  // create eigen representations kernel matrix, L^T, sqrt(ttau) and tnu
295  Map<VectorXd> eigen_tnu(m_tnu.vector, m_tnu.vlen);
296  Map<VectorXd> eigen_sttau(m_sttau.vector, m_sttau.vlen);
298 
299  // create shogun and eigen representation of the alpha vector
301  Map<VectorXd> eigen_alpha(m_alpha.vector, m_alpha.vlen);
302 
303  // solve LL^T * v = tS^(1/2) * K * tnu
304  VectorXd eigen_v=eigen_L.triangularView<Upper>().adjoint().solve(
305  eigen_sttau.cwiseProduct(eigen_K*CMath::sq(m_scale)*eigen_tnu));
306  eigen_v=eigen_L.triangularView<Upper>().solve(eigen_v);
307 
308  // compute alpha = (I - tS^(1/2) * B^(-1) * tS(1/2) * K) * tnu =
309  // tnu - tS(1/2) * (L^T)^(-1) * L^(-1) * tS^(1/2) * K * tnu =
310  // tnu - tS(1/2) * v
311  eigen_alpha=eigen_tnu-eigen_sttau.cwiseProduct(eigen_v);
312 }
313 
315 {
316  // create eigen representations of kernel matrix and sqrt(ttau)
318  Map<VectorXd> eigen_sttau(m_sttau.vector, m_sttau.vlen);
319 
320  // create shogun and eigen representation of the upper triangular factor
321  // (L^T) of the Cholesky decomposition of the matrix B
324 
325  // compute upper triangular factor L^T of the Cholesky decomposion of the
326  // matrix: B = tS^(1/2) * K * tS^(1/2) + I
327  LLT<MatrixXd> eigen_chol((eigen_sttau*eigen_sttau.adjoint()).cwiseProduct(
328  eigen_K*CMath::sq(m_scale))+
329  MatrixXd::Identity(m_L.num_rows, m_L.num_cols));
330 
331  eigen_L=eigen_chol.matrixU();
332 }
333 
335 {
336  // create eigen representations of kernel matrix, L^T matrix and sqrt(ttau)
339  Map<VectorXd> eigen_sttau(m_sttau.vector, m_sttau.vlen);
340 
341  // create shogun and eigen representation of the approximate covariance
342  // matrix
344  Map<MatrixXd> eigen_Sigma(m_Sigma.matrix, m_Sigma.num_rows, m_Sigma.num_cols);
345 
346  // compute V = L^(-1) * tS^(1/2) * K, using upper triangular factor L^T
347  MatrixXd eigen_V=eigen_L.triangularView<Upper>().adjoint().solve(
348  eigen_sttau.asDiagonal()*eigen_K*CMath::sq(m_scale));
349 
350  // compute covariance matrix of the posterior:
351  // Sigma = K - K * tS^(1/2) * (L * L^T)^(-1) * tS^(1/2) * K =
352  // K - (K * tS^(1/2)) * (L^T)^(-1) * L^(-1) * tS^(1/2) * K =
353  // K - (tS^(1/2) * K)^T * (L^(-1))^T * L^(-1) * tS^(1/2) * K = K - V^T * V
354  eigen_Sigma=eigen_K*CMath::sq(m_scale)-eigen_V.adjoint()*eigen_V;
355 }
356 
358 {
359  // create eigen representation of posterior covariance matrix and tnu
360  Map<MatrixXd> eigen_Sigma(m_Sigma.matrix, m_Sigma.num_rows, m_Sigma.num_cols);
361  Map<VectorXd> eigen_tnu(m_tnu.vector, m_tnu.vlen);
362 
363  // create shogun and eigen representation of the approximate mean vector
364  CREATE_SGVECTOR(m_mu, m_tnu.vlen, float64_t);
365  Map<VectorXd> eigen_mu(m_mu.vector, m_mu.vlen);
366 
367  // compute mean vector of the approximate posterior: mu = Sigma * tnu
368  eigen_mu=eigen_Sigma*eigen_tnu;
369 }
370 
372 {
373  // create eigen representation of Sigma, L, mu, tnu, ttau
374  Map<MatrixXd> eigen_Sigma(m_Sigma.matrix, m_Sigma.num_rows, m_Sigma.num_cols);
376  Map<VectorXd> eigen_mu(m_mu.vector, m_mu.vlen);
377  Map<VectorXd> eigen_tnu(m_tnu.vector, m_tnu.vlen);
378  Map<VectorXd> eigen_ttau(m_ttau.vector, m_ttau.vlen);
379 
380  // get and create eigen representation of the mean vector
382  Map<VectorXd> eigen_m(m.vector, m.vlen);
383 
384  // compute vector of cavity parameter tau_n
385  VectorXd eigen_tau_n=(VectorXd::Ones(m_ttau.vlen)).cwiseQuotient(
386  eigen_Sigma.diagonal())-eigen_ttau;
387 
388  // compute vector of cavity parameter nu_n
389  VectorXd eigen_nu_n=eigen_mu.cwiseQuotient(eigen_Sigma.diagonal())-
390  eigen_tnu+eigen_m.cwiseProduct(eigen_tau_n);
391 
392  // compute cavity mean: mu_n=nu_n/tau_n
393  SGVector<float64_t> mu_n(m_ttau.vlen);
394  Map<VectorXd> eigen_mu_n(mu_n.vector, mu_n.vlen);
395 
396  eigen_mu_n=eigen_nu_n.cwiseQuotient(eigen_tau_n);
397 
398  // compute cavity variance: s2_n=1.0/tau_n
399  SGVector<float64_t> s2_n(m_ttau.vlen);
400  Map<VectorXd> eigen_s2_n(s2_n.vector, s2_n.vlen);
401 
402  eigen_s2_n=(VectorXd::Ones(m_ttau.vlen)).cwiseQuotient(eigen_tau_n);
403 
405  m_model->get_log_zeroth_moments(mu_n, s2_n, m_labels));
406 
407  // compute nlZ_part1=sum(log(diag(L)))-sum(lZ)-tnu'*Sigma*tnu/2
408  float64_t nlZ_part1=eigen_L.diagonal().array().log().sum()-lZ-
409  (eigen_tnu.adjoint()*eigen_Sigma).dot(eigen_tnu)/2.0;
410 
411  // compute nlZ_part2=sum(tnu.^2./(tau_n+ttau))/2-sum(log(1+ttau./tau_n))/2
412  float64_t nlZ_part2=(eigen_tnu.array().square()/
413  (eigen_tau_n+eigen_ttau).array()).sum()/2.0-(1.0+eigen_ttau.array()/
414  eigen_tau_n.array()).log().sum()/2.0;
415 
416  // compute nlZ_part3=-(nu_n-m.*tau_n)'*((ttau./tau_n.*(nu_n-m.*tau_n)-2*tnu)
417  // ./(ttau+tau_n))/2
418  float64_t nlZ_part3=-(eigen_nu_n-eigen_m.cwiseProduct(eigen_tau_n)).dot(
419  ((eigen_ttau.array()/eigen_tau_n.array()*(eigen_nu_n.array()-
420  eigen_m.array()*eigen_tau_n.array())-2*eigen_tnu.array())/
421  (eigen_ttau.array()+eigen_tau_n.array())).matrix())/2.0;
422 
423  // compute nlZ=nlZ_part1+nlZ_part2+nlZ_part3
424  m_nlZ=nlZ_part1+nlZ_part2+nlZ_part3;
425 }
426 
428 {
429  // create eigen representation of L, sstau, alpha
431  Map<VectorXd> eigen_sttau(m_sttau.vector, m_sttau.vlen);
432  Map<VectorXd> eigen_alpha(m_alpha.vector, m_alpha.vlen);
433 
434  // create shogun and eigen representation of F
436  Map<MatrixXd> eigen_F(m_F.matrix, m_F.num_rows, m_F.num_cols);
437 
438  // solve L*L^T * V = diag(sqrt(ttau))
439  MatrixXd V=eigen_L.triangularView<Upper>().adjoint().solve(
440  MatrixXd(eigen_sttau.asDiagonal()));
441  V=eigen_L.triangularView<Upper>().solve(V);
442 
443  // compute F=alpha*alpha'-repmat(sW,1,n).*solve_chol(L,diag(sW))
444  eigen_F=eigen_alpha*eigen_alpha.adjoint()-eigen_sttau.asDiagonal()*V;
445 }
446 
448  const TParameter* param)
449 {
450  REQUIRE(!strcmp(param->m_name, "scale"), "Can't compute derivative of "
451  "the nagative log marginal likelihood wrt %s.%s parameter\n",
452  get_name(), param->m_name)
453 
455  Map<MatrixXd> eigen_F(m_F.matrix, m_F.num_rows, m_F.num_cols);
456 
457  SGVector<float64_t> result(1);
458 
459  // compute derivative wrt kernel scale: dnlZ=-sum(F.*K*scale*2)/2
460  result[0]=-(eigen_F.cwiseProduct(eigen_K)*m_scale*2.0).sum()/2.0;
461 
462  return result;
463 }
464 
466  const TParameter* param)
467 {
469  return SGVector<float64_t>();
470 }
471 
473  const TParameter* param)
474 {
475  // create eigen representation of the matrix Q
476  Map<MatrixXd> eigen_F(m_F.matrix, m_F.num_rows, m_F.num_cols);
477 
478  REQUIRE(param, "Param not set\n");
479  SGVector<float64_t> result;
480  int64_t len=const_cast<TParameter *>(param)->m_datatype.get_num_elements();
481  result=SGVector<float64_t>(len);
482 
483  for (index_t i=0; i<result.vlen; i++)
484  {
486 
487  if (result.vlen==1)
488  dK=m_kernel->get_parameter_gradient(param);
489  else
490  dK=m_kernel->get_parameter_gradient(param, i);
491 
492  Map<MatrixXd> eigen_dK(dK.matrix, dK.num_rows, dK.num_cols);
493 
494  // compute derivative wrt kernel parameter: dnlZ=-sum(F.*dK*scale^2)/2.0
495  result[i]=-(eigen_F.cwiseProduct(eigen_dK)*CMath::sq(m_scale)).sum()/2.0;
496  }
497 
498  return result;
499 }
500 
502  const TParameter* param)
503 {
505  return SGVector<float64_t>();
506 }
507 
508 #endif /* HAVE_EIGEN3 */
void range_fill(T start=0)
Definition: SGVector.cpp:173
static void permute(SGVector< T > v, CRandom *rand=NULL)
Definition: Math.h:1144
virtual SGVector< float64_t > get_diagonal_vector()
virtual void update_parameter_hash()
Definition: SGObject.cpp:250
virtual SGVector< float64_t > get_alpha()
SGVector< float64_t > m_alpha
The Inference Method base class.
int32_t index_t
Definition: common.h:62
Vector::Scalar dot(Vector a, Vector b)
Definition: Redux.h:56
The class Labels models labels, i.e. class assignments of objects.
Definition: Labels.h:43
static const float64_t INFTY
infinity
Definition: Math.h:2048
virtual SGMatrix< float64_t > get_posterior_covariance()
virtual int32_t get_num_labels() const =0
static T sq(T x)
Definition: Math.h:450
Definition: SGMatrix.h:20
parameter struct
#define SG_ERROR(...)
Definition: SGIO.h:129
virtual SGVector< float64_t > get_log_zeroth_moments(SGVector< float64_t > mu, SGVector< float64_t > s2, const CLabels *lab) const =0
#define REQUIRE(x,...)
Definition: SGIO.h:206
#define SG_NOTIMPLEMENTED
Definition: SGIO.h:139
index_t num_cols
Definition: SGMatrix.h:378
virtual SGVector< float64_t > get_mean_vector(const CFeatures *features) const =0
#define CREATE_SGMATRIX(mat, rows, cols, sg_type)
virtual float64_t get_negative_log_marginal_likelihood()
virtual float64_t get_second_moment(SGVector< float64_t > mu, SGVector< float64_t > s2, const CLabels *lab, index_t i) const =0
An abstract class of the mean function.
Definition: MeanFunction.h:28
void scale(T alpha)
Scale vector inplace.
Definition: SGVector.cpp:843
virtual SGVector< float64_t > get_derivative_wrt_kernel(const TParameter *param)
#define SG_REF(x)
Definition: SGObject.h:51
index_t num_rows
Definition: SGMatrix.h:376
virtual SGVector< float64_t > get_posterior_mean()
SGVector< T > get_diagonal_vector() const
Definition: SGMatrix.cpp:1099
index_t vlen
Definition: SGVector.h:494
SGMatrix< float64_t > m_L
virtual SGVector< float64_t > get_derivative_wrt_mean(const TParameter *param)
double float64_t
Definition: common.h:50
#define CREATE_SGVECTOR(vec, len, sg_type)
static T sum(T *vec, int32_t len)
Return sum(vec)
Definition: SGVector.h:354
static T max(T a, T b)
Definition: Math.h:168
Matrix< float64_t,-1,-1, 0,-1,-1 > MatrixXd
virtual SGVector< float64_t > get_derivative_wrt_likelihood_model(const TParameter *param)
static CEPInferenceMethod * obtain_from_generic(CInferenceMethod *inference)
#define SG_DEBUG(...)
Definition: SGIO.h:107
all of classes and functions are contained in the shogun namespace
Definition: class_list.h:18
virtual SGVector< float64_t > get_derivative_wrt_inference_method(const TParameter *param)
Class of the Expectation Propagation (EP) posterior approximation inference method.
The class Features is the base class of all feature objects.
Definition: Features.h:68
#define SG_SERROR(...)
Definition: SGIO.h:179
virtual SGMatrix< float64_t > get_parameter_gradient(const TParameter *param, index_t index=-1)
Definition: Kernel.h:850
virtual EInferenceType get_inference_type() const
The Kernel base class.
Definition: Kernel.h:158
virtual SGMatrix< float64_t > get_cholesky()
static float32_t sqrt(float32_t x)
Definition: Math.h:459
virtual float64_t get_first_moment(SGVector< float64_t > mu, SGVector< float64_t > s2, const CLabels *lab, index_t i) const =0
virtual const char * get_name() const
virtual bool parameter_hash_changed()
Definition: SGObject.cpp:264
The Likelihood model base class.
SGMatrix< float64_t > m_ktrtr
CLikelihoodModel * m_model
static T abs(T a)
Definition: Math.h:179

SHOGUN Machine Learning Toolbox - Documentation