SHOGUN  4.2.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
FITCInferenceMethod.cpp
Go to the documentation of this file.
1 /*
2  * Copyright (c) The Shogun Machine Learning Toolbox
3  * Written (W) 2015 Wu Lin
4  * Written (W) 2013 Roman Votyakov
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions are met:
9  *
10  * 1. Redistributions of source code must retain the above copyright notice, this
11  * list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright notice,
13  * this list of conditions and the following disclaimer in the documentation
14  * and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19  * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
20  * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26  *
27  * The views and conclusions contained in the software and documentation are those
28  * of the authors and should not be interpreted as representing official policies,
29  * either expressed or implied, of the Shogun Development Team.
30  *
31  */
32 
38 
39 using namespace shogun;
40 using namespace Eigen;
41 
43 {
44  init();
45 }
46 
48  CMeanFunction* m, CLabels* lab, CLikelihoodModel* mod, CFeatures* lat)
49  : CSingleFITCInference(kern, feat, m, lab, mod, lat)
50 {
51  init();
52 }
53 
54 void CFITCInferenceMethod::init()
55 {
56 }
57 
59 {
60 }
62 {
64 
65  if (!m_gradient_update)
66  {
67  update_deriv();
68  m_gradient_update=true;
70  }
71 }
72 
74 {
75  SG_DEBUG("entering\n");
76 
78  update_chol();
79  update_alpha();
80  m_gradient_update=false;
82 
83  SG_DEBUG("leaving\n");
84 }
85 
87  CInference* inference)
88 {
89  if (inference==NULL)
90  return NULL;
91 
92  if (inference->get_inference_type()!=INF_FITC_REGRESSION)
93  SG_SERROR("Provided inference is not of type CFITCInferenceMethod!\n")
94 
95  SG_REF(inference);
96  return (CFITCInferenceMethod*)inference;
97 }
98 
100 {
102 
104  "FITC inference method can only use Gaussian likelihood function\n")
105  REQUIRE(m_labels->get_label_type()==LT_REGRESSION, "Labels must be type "
106  "of CRegressionLabels\n")
107 }
108 
110 {
112  update();
113 
114  // get the sigma variable from the Gaussian likelihood model
116  float64_t sigma=lik->get_sigma();
117  SG_UNREF(lik);
118 
119  // compute diagonal vector: sW=1/sigma
121  result.fill_vector(result.vector, m_features->get_num_vectors(), 1.0/sigma);
122 
123  return result;
124 }
125 
127 {
129  update();
130 
131  //time complexity of the following operations is O(m*n)
132 
133  // create eigen representations of chol_utr, dg, r, be
136  Map<VectorXd> eigen_t(m_t.vector, m_t.vlen);
137  Map<VectorXd> eigen_r(m_r.vector, m_r.vlen);
138  Map<VectorXd> eigen_be(m_be.vector, m_be.vlen);
139 
140  // compute negative log marginal likelihood:
141  // nlZ=sum(log(diag(utr)))+(sum(log(dg))+r'*r-be'*be+n*log(2*pi))/2
142  float64_t result=eigen_chol_utr.diagonal().array().log().sum()+
143  (-eigen_t.array().log().sum()+eigen_r.dot(eigen_r)-eigen_be.dot(eigen_be)+
145 
146  return result;
147 }
148 
150 {
151  //time complexits O(m^2*n)
152 
153  // get the sigma variable from the Gaussian likelihood model
155  float64_t sigma=lik->get_sigma();
156  SG_UNREF(lik);
157 
158  // eigen3 representation of covariance matrix of inducing features (m_kuu)
159  // and training features (m_ktru)
162 
164 
165  // solve Luu' * Luu = Kuu + m_ind_noise * I
166  //Luu = chol(Kuu+snu2*eye(nu)); % Kuu + snu2*I = Luu'*Luu
167  LLT<MatrixXd> Luu(eigen_kuu*CMath::exp(m_log_scale*2.0)+CMath::exp(m_log_ind_noise)*MatrixXd::Identity(
169 
170  // create shogun and eigen3 representation of cholesky of covariance of
171  // inducing features Luu (m_chol_uu and eigen_chol_uu)
172  m_chol_uu=SGMatrix<float64_t>(Luu.rows(), Luu.cols());
175  eigen_chol_uu=Luu.matrixU();
176 
177  // solve Luu' * V = Ktru
178  //V = Luu'\Ku; % V = inv(Luu')*Ku => V'*V = Q
179 
182  V=eigen_chol_uu.triangularView<Upper>().adjoint().solve(eigen_ktru*
183  CMath::exp(m_log_scale*2.0));
184 
185  // create shogun and eigen3 representation of
186  // dg = diag(K) + sn2 - diag(Q)
187  // t = 1/dg
189  Map<VectorXd> eigen_t(m_t.vector, m_t.vlen);
190 
191  //g_sn2 = diagK + sn2 - sum(V.*V,1)'; % g + sn2 = diag(K) + sn2 - diag(Q)
192  eigen_t=eigen_ktrtr_diag*CMath::exp(m_log_scale*2.0)+CMath::sq(sigma)*
193  VectorXd::Ones(m_t.vlen)-(V.cwiseProduct(V)).colwise().sum().adjoint();
194  eigen_t=MatrixXd::Ones(eigen_t.rows(),1).cwiseQuotient(eigen_t);
195 
196  // solve Lu' * Lu = V * diag(1/dg) * V' + I
197  //Lu = chol(eye(nu) + (V./repmat(g_sn2',nu,1))*V'); % Lu'*Lu=I+V*diag(1/g_sn2)*V'
198  LLT<MatrixXd> Lu(V*((VectorXd::Ones(m_t.vlen)).cwiseProduct(eigen_t)).asDiagonal()*
199  V.adjoint()+MatrixXd::Identity(m_kuu.num_rows, m_kuu.num_cols));
200 
201  // create shogun and eigen3 representation of cholesky of covariance of
202  // training features Luu (m_chol_utr and eigen_chol_utr)
203  m_chol_utr=SGMatrix<float64_t>(Lu.rows(), Lu.cols());
204  Map<MatrixXd> eigen_chol_utr(m_chol_utr.matrix, m_chol_utr.num_rows,
205  m_chol_utr.num_cols);
206  eigen_chol_utr=Lu.matrixU();
207 
208  // create eigen representation of labels and mean vectors
209  SGVector<float64_t> y=((CRegressionLabels*) m_labels)->get_labels();
210  Map<VectorXd> eigen_y(y.vector, y.vlen);
212  Map<VectorXd> eigen_m(m.vector, m.vlen);
213 
214  // compute sgrt_dg = sqrt(dg)
215  VectorXd sqrt_t=eigen_t.array().sqrt();
216 
217  // create shogun and eigen3 representation of labels adjusted for
218  // noise and means (m_r)
219  //r = (y-m)./sqrt(g_sn2);
221  Map<VectorXd> eigen_r(m_r.vector, m_r.vlen);
222  eigen_r=(eigen_y-eigen_m).cwiseProduct(sqrt_t);
223 
224  // compute be
225  //be = Lu'\(V*(r./sqrt(g_sn2)));
226  m_be=SGVector<float64_t>(m_chol_utr.num_cols);
227  Map<VectorXd> eigen_be(m_be.vector, m_be.vlen);
228  eigen_be=eigen_chol_utr.triangularView<Upper>().adjoint().solve(
229  V*eigen_r.cwiseProduct(sqrt_t));
230 
231  // compute iKuu
232  //iKuu = solve_chol(Luu,eye(nu));
233  MatrixXd iKuu=Luu.solve(MatrixXd::Identity(m_kuu.num_rows, m_kuu.num_cols));
234 
235  // create shogun and eigen3 representation of posterior cholesky
236  MatrixXd eigen_prod=eigen_chol_utr*eigen_chol_uu;
239 
240  //post.L = solve_chol(Lu*Luu,eye(nu)) - iKuu;
241  eigen_chol=eigen_prod.triangularView<Upper>().adjoint().solve(
242  MatrixXd::Identity(m_kuu.num_rows, m_kuu.num_cols));
243  eigen_chol=eigen_prod.triangularView<Upper>().solve(eigen_chol)-iKuu;
244 }
245 
247 {
248  //time complexity O(m^2) since triangular.solve is O(m^2)
253  Map<VectorXd> eigen_be(m_be.vector, m_be.vlen);
254 
255  // create shogun and eigen representations of alpha
256  // and solve Luu * Lu * alpha = be
258  Map<VectorXd> eigen_alpha(m_alpha.vector, m_alpha.vlen);
259 
260  //post.alpha = Luu\(Lu\be);
261  eigen_alpha=eigen_chol_utr.triangularView<Upper>().solve(eigen_be);
262  eigen_alpha=eigen_chol_uu.triangularView<Upper>().solve(eigen_alpha);
263 }
264 
266 {
267  //time complexits O(m^2*n)
268 
269  // create eigen representation of Ktru, Lu, Luu, dg, be
275  Map<VectorXd> eigen_t(m_t.vector, m_t.vlen);
276  Map<VectorXd> eigen_be(m_be.vector, m_be.vlen);
277 
278  // get and create eigen representation of labels
279  SGVector<float64_t> y=((CRegressionLabels*) m_labels)->get_labels();
280  Map<VectorXd> eigen_y(y.vector, y.vlen);
281 
282  // get and create eigen representation of mean vector
284  Map<VectorXd> eigen_m(m.vector, m.vlen);
285 
286  // compute V=inv(Luu')*Ku
288 
289  // create shogun and eigen representation of al
291  Map<VectorXd> eigen_al(m_al.vector, m_al.vlen);
292 
293  // compute al=(Kt+sn2*eye(n))\y
294  //r = (y-m)./sqrt(g_sn2);
295  //al = r./sqrt(g_sn2) - (V'*(Lu\be))./g_sn2; % al = (Kt+sn2*eye(n))\(y-m)
296  eigen_al=((eigen_y-eigen_m)-(V.adjoint()*
297  eigen_Lu.triangularView<Upper>().solve(eigen_be))).cwiseProduct(eigen_t);
298 
299  // compute inv(Kuu+snu2*I)=iKuu
300  MatrixXd iKuu=eigen_Luu.triangularView<Upper>().adjoint().solve(
301  MatrixXd::Identity(m_kuu.num_rows, m_kuu.num_cols));
302  iKuu=eigen_Luu.triangularView<Upper>().solve(iKuu);
303 
304  // create shogun and eigen representation of B
305  m_B=SGMatrix<float64_t>(iKuu.rows(), eigen_Ktru.cols());
307 
308  //B = iKuu*Ku; w = B*al;
309  eigen_B=iKuu*eigen_Ktru*CMath::exp(m_log_scale*2.0);
310 
311  // create shogun and eigen representation of w
313  Map<VectorXd> eigen_w(m_w.vector, m_w.vlen);
314 
315  eigen_w=eigen_B*eigen_al;
316 
317  // create shogun and eigen representation of W
320 
321  // compute W=Lu'\(V./repmat(g_sn2',nu,1))
322  //W = Lu'\(V./repmat(g_sn2',nu,1));
323  eigen_W=eigen_Lu.triangularView<Upper>().adjoint().solve(V*VectorXd::Ones(
324  m_t.vlen).cwiseProduct(eigen_t).asDiagonal());
325 }
326 
327 
329 {
331 
333  Map<VectorXd> eigen_mu(m_mu.vector, m_mu.vlen);
334 
335  /*
336  //true posterior mean with equivalent FITC prior
337  //time complexity of the following operations is O(n)
338  Map<VectorXd> eigen_al(m_al.vector, m_al.vlen);
339  SGVector<float64_t> y=((CRegressionLabels*) m_labels)->get_labels();
340  Map<VectorXd> eigen_y(y.vector, y.vlen);
341  SGVector<float64_t> m=m_mean->get_mean_vector(m_features);
342  Map<VectorXd> eigen_m(m.vector, m.vlen);
343  CGaussianLikelihood* lik=CGaussianLikelihood::obtain_from_generic(m_model);
344  float64_t sigma=lik->get_sigma();
345  SG_UNREF(lik);
346  eigen_mu=(eigen_y-eigen_m)-eigen_al*CMath::sq(sigma);
347  */
348 
349  //FITC approximated posterior mean
350  Map<VectorXd> eigen_alpha(m_alpha.vector, m_alpha.vlen);
352  //time complexity of the following operation is O(m*n)
353  eigen_mu=CMath::exp(m_log_scale*2.0)*eigen_Ktru.adjoint()*eigen_alpha;
354 
355  return SGVector<float64_t>(m_mu);
356 }
357 
359 {
361 
362  //time complexity of the following operations is O(m*n^2)
363  //Warning: the the time complexity increases from O(m^2*n) to O(n^2*m) if this method is called
366  m_Sigma.num_cols);
370 
371  /*
372  //true posterior mean with equivalent FITC prior
373  CGaussianLikelihood* lik=CGaussianLikelihood::obtain_from_generic(m_model);
374  float64_t sigma=lik->get_sigma();
375  SG_UNREF(lik);
376  Map<VectorXd> eigen_t(m_t.vector, m_t.vlen);
377  VectorXd diag_part=CMath::sq(sigma)*eigen_t;
378  // diag(sigma2/dg)*V'*(Lu\eye(n))
379  MatrixXd part1=diag_part.asDiagonal()*eigen_V.adjoint()*
380  (eigen_Lu.triangularView<Upper>().solve(MatrixXd::Identity(
381  m_kuu.num_rows, m_kuu.num_cols)));
382  eigen_Sigma=part1*part1.adjoint();
383  VectorXd part2=(VectorXd::Ones(m_t.vlen)-diag_part)*CMath::sq(sigma);
384  eigen_Sigma+=part2.asDiagonal();
385  */
386 
387  //FITC approximated posterior covariance
388  //TO DO we only need tha diagonal elements of m_ktrtr
390  MatrixXd part1=eigen_V.adjoint()*(eigen_Lu.triangularView<Upper>().solve(MatrixXd::Identity(
392  eigen_Sigma=part1*part1.adjoint();
393  VectorXd part2=eigen_ktrtr_diag*CMath::exp(m_log_scale*2.0)-(
394  eigen_V.cwiseProduct(eigen_V)).colwise().sum().adjoint();
395  eigen_Sigma+=part2.asDiagonal();
396 
398 }
399 
401  const TParameter* param)
402 {
403  //time complexity O(m*n)
404  REQUIRE(!strcmp(param->m_name, "log_sigma"), "Can't compute derivative of "
405  "the nagative log marginal likelihood wrt %s.%s parameter\n",
406  m_model->get_name(), param->m_name)
407 
408  // create eigen representation of dg, al, w, W and B
409  Map<VectorXd> eigen_t(m_t.vector, m_t.vlen);
410  Map<VectorXd> eigen_al(m_al.vector, m_al.vlen);
411  Map<VectorXd> eigen_w(m_w.vector, m_w.vlen);
414 
415  // get the sigma variable from the Gaussian likelihood model
417  float64_t sigma=lik->get_sigma();
418  SG_UNREF(lik);
419 
420  SGVector<float64_t> result(1);
421 
422  //diag_dK = 1./g_sn2 - sum(W.*W,1)' - al.*al; % diag(dnlZ/dK)
423  //dnlZ.lik = sn2*sum(diag_dK) without noise term
424  result[0]=CMath::sq(sigma)*(VectorXd::Ones(m_t.vlen).cwiseProduct(
425  eigen_t).sum()-eigen_W.cwiseProduct(eigen_W).sum()-eigen_al.dot(eigen_al));
426 
427  return result;
428 }
429 
431 {
432  SG_WARNING("The method does not require a minimizer. The provided minimizer will not be used.\n");
433 }
virtual const char * get_name() const =0
float64_t m_log_scale
Definition: Inference.h:490
virtual void update()
Definition: Inference.cpp:314
static void fill_vector(T *vec, int32_t len, T value)
Definition: SGVector.cpp:221
virtual ELabelType get_label_type() const =0
virtual void update_parameter_hash()
Definition: SGObject.cpp:248
Class that models Gaussian likelihood.
Real Labels are real-valued labels.
virtual SGVector< float64_t > get_diagonal_vector()
The class Labels models labels, i.e. class assignments of objects.
Definition: Labels.h:43
virtual EInferenceType get_inference_type() const
Definition: Inference.h:104
SGMatrix< float64_t > m_chol_uu
real valued labels (e.g. for regression, classifier outputs)
Definition: LabelTypes.h:22
SGVector< float64_t > m_mu
static T sq(T x)
Definition: Math.h:450
Definition: SGMatrix.h:20
virtual ELikelihoodModelType get_model_type() const
parameter struct
virtual int32_t get_num_vectors() const =0
#define REQUIRE(x,...)
Definition: SGIO.h:206
virtual SGVector< float64_t > get_posterior_mean()
index_t num_cols
Definition: SGMatrix.h:376
virtual SGVector< float64_t > get_mean_vector(const CFeatures *features) const =0
An abstract class of the mean function.
Definition: MeanFunction.h:49
#define SG_REF(x)
Definition: SGObject.h:51
virtual void check_members() const
index_t num_rows
Definition: SGMatrix.h:374
SGMatrix< float64_t > m_chol_utr
virtual void register_minimizer(Minimizer *minimizer)
CFeatures * m_features
Definition: Inference.h:478
virtual float64_t get_negative_log_marginal_likelihood()
static CFITCInferenceMethod * obtain_from_generic(CInference *inference)
CMeanFunction * m_mean
Definition: Inference.h:472
virtual void check_members() const
index_t vlen
Definition: SGVector.h:492
CLabels * m_labels
Definition: Inference.h:481
double float64_t
Definition: common.h:50
virtual void compute_gradient()
Definition: Inference.cpp:341
SGMatrix< float64_t > m_Sigma
SGMatrix< float64_t > m_kuu
SGMatrix< float64_t > m_L
Definition: Inference.h:487
SGVector< float64_t > m_ktrtr_diag
static CGaussianLikelihood * obtain_from_generic(CLikelihoodModel *lik)
#define SG_UNREF(x)
Definition: SGObject.h:52
#define SG_DEBUG(...)
Definition: SGIO.h:107
all of classes and functions are contained in the shogun namespace
Definition: class_list.h:18
The Inference Method base class.
Definition: Inference.h:81
The class Features is the base class of all feature objects.
Definition: Features.h:68
#define SG_SERROR(...)
Definition: SGIO.h:179
The Fully Independent Conditional Training inference method class.
virtual SGVector< float64_t > get_derivative_wrt_likelihood_model(const TParameter *param)
static float64_t exp(float64_t x)
Definition: Math.h:621
static float64_t log(float64_t v)
Definition: Math.h:922
The Kernel base class.
Definition: Kernel.h:159
SGMatrix< float64_t > m_ktru
The minimizer base class.
Definition: Minimizer.h:41
#define SG_WARNING(...)
Definition: SGIO.h:128
The Fully Independent Conditional Training inference base class for Laplace and regression for 1-D la...
CLikelihoodModel * m_model
Definition: Inference.h:475
virtual bool parameter_hash_changed()
Definition: SGObject.cpp:262
The Likelihood model base class.
virtual SGMatrix< float64_t > get_posterior_covariance()
SGVector< float64_t > m_alpha
Definition: Inference.h:484
static const float64_t PI
Definition: Math.h:2055

SHOGUN Machine Learning Toolbox - Documentation