SHOGUN  3.2.1
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
InferenceMethod.cpp
Go to the documentation of this file.
1 /*
2  * This program is free software; you can redistribute it and/or modify
3  * it under the terms of the GNU General Public License as published by
4  * the Free Software Foundation; either version 3 of the License, or
5  * (at your option) any later version.
6  *
7  * Written (W) 2013 Roman Votyakov
8  * Written (W) 2013 Heiko Strathmann
9  * Copyright (C) 2012 Jacob Walker
10  * Copyright (C) 2013 Roman Votyakov
11  */
12 
13 #include <shogun/lib/config.h>
14 
15 #ifdef HAVE_EIGEN3
16 
20 #include <shogun/lib/Lock.h>
21 
22 using namespace shogun;
23 
24 #ifndef DOXYGEN_SHOULD_SKIP_THIS
25 struct GRADIENT_THREAD_PARAM
26 {
27  CInferenceMethod* inf;
29  CSGObject* obj;
30  TParameter* param;
31  CLock* lock;
32 };
33 #endif /* DOXYGEN_SHOULD_SKIP_THIS */
34 
36 {
37  init();
38 }
39 
41 {
43  update();
44 
45  return SGMatrix<float64_t>(m_E);
46 }
47 
49  CMeanFunction* mean, CLabels* labels, CLikelihoodModel* model)
50 {
51  init();
52 
53  set_kernel(kernel);
54  set_features(features);
55  set_labels(labels);
56  set_model(model);
57  set_mean(mean);
58 }
59 
61 {
67 }
68 
69 void CInferenceMethod::init()
70 {
71  SG_ADD((CSGObject**)&m_kernel, "kernel", "Kernel", MS_AVAILABLE);
72  SG_ADD(&m_scale, "scale", "Kernel scale", MS_AVAILABLE, GRADIENT_AVAILABLE);
73  SG_ADD((CSGObject**)&m_model, "likelihood_model", "Likelihood model",
74  MS_AVAILABLE);
75  SG_ADD((CSGObject**)&m_mean, "mean_function", "Mean function", MS_AVAILABLE);
76  SG_ADD((CSGObject**)&m_labels, "labels", "Labels", MS_NOT_AVAILABLE);
77  SG_ADD((CSGObject**)&m_features, "features", "Features", MS_NOT_AVAILABLE);
78 
79  m_kernel=NULL;
80  m_model=NULL;
81  m_labels=NULL;
82  m_features=NULL;
83  m_mean=NULL;
84  m_scale=1.0;
85 
86  SG_ADD(&m_alpha, "alpha", "alpha vector used in process mean calculation", MS_NOT_AVAILABLE);
87  SG_ADD(&m_L, "L", "upper triangular factor of Cholesky decomposition", MS_NOT_AVAILABLE);
88  SG_ADD(&m_E, "E", "the matrix used for multi classification", MS_NOT_AVAILABLE);
89 }
90 
92  int32_t num_importance_samples, float64_t ridge_size)
93 {
94  /* sample from Gaussian approximation to q(f|y) */
96 
97  /* add ridge */
98  for (index_t i=0; i<cov.num_rows; ++i)
99  cov(i,i)+=ridge_size;
100 
102 
103  CGaussianDistribution* post_approx=new CGaussianDistribution(mean, cov);
104  SGMatrix<float64_t> samples=post_approx->sample(num_importance_samples);
105 
106  /* evaluate q(f^i|y), p(f^i|\theta), p(y|f^i), i.e.,
107  * log pdf of approximation, prior and likelihood */
108 
109  /* log pdf q(f^i|y) */
110  SGVector<float64_t> log_pdf_post_approx=post_approx->log_pdf_multiple(samples);
111 
112  /* dont need gaussian anymore, free memory */
113  SG_UNREF(post_approx);
114  post_approx=NULL;
115 
116  /* log pdf p(f^i|\theta) and free memory afterwise. Scale kernel before */
118  memcpy(scaled_kernel.matrix, m_ktrtr.matrix,
120  for (index_t i=0; i<m_ktrtr.num_rows*m_ktrtr.num_cols; ++i)
121  scaled_kernel.matrix[i]*=CMath::sq(m_scale);
122 
123  /* add ridge */
124  for (index_t i=0; i<cov.num_rows; ++i)
125  scaled_kernel(i,i)+=ridge_size;
126 
128  m_mean->get_mean_vector(m_features), scaled_kernel);
129  SGVector<float64_t> log_pdf_prior=prior->log_pdf_multiple(samples);
130  SG_UNREF(prior);
131  prior=NULL;
132 
133  /* p(y|f^i) */
135  m_labels, samples);
136 
137  /* combine probabilities */
138  ASSERT(log_likelihood.vlen==num_importance_samples);
139  ASSERT(log_likelihood.vlen==log_pdf_prior.vlen);
140  ASSERT(log_likelihood.vlen==log_pdf_post_approx.vlen);
141  SGVector<float64_t> sum(log_likelihood);
142  for (index_t i=0; i<log_likelihood.vlen; ++i)
143  sum[i]=log_likelihood[i]+log_pdf_prior[i]-log_pdf_post_approx[i];
144 
145  /* use log-sum-exp (in particular, log-mean-exp) trick to combine values */
146  return CMath::log_mean_exp(sum);
147 }
148 
151 {
152  REQUIRE(params->get_num_elements(), "Number of parameters should be greater "
153  "than zero\n")
154 
156  update();
157 
158  // get number of derivatives
159  const index_t num_deriv=params->get_num_elements();
160 
161  // create map of derivatives
163  new CMap<TParameter*, SGVector<float64_t> >(num_deriv, num_deriv);
164 
165  SG_REF(result);
166 
167  // create lock object
168  CLock lock;
169 
170 #ifdef HAVE_PTHREAD
171  if (num_deriv<2)
172  {
173 #endif /* HAVE_PTHREAD */
174  for (index_t i=0; i<num_deriv; i++)
175  {
176  CMapNode<TParameter*, CSGObject*>* node=params->get_node_ptr(i);
177 
178  GRADIENT_THREAD_PARAM thread_params;
179 
180  thread_params.inf=this;
181  thread_params.obj=node->data;
182  thread_params.param=node->key;
183  thread_params.grad=result;
184  thread_params.lock=&lock;
185 
186  get_derivative_helper((void*) &thread_params);
187  }
188 #ifdef HAVE_PTHREAD
189  }
190  else
191  {
192  pthread_t* threads=SG_MALLOC(pthread_t, num_deriv);
193  GRADIENT_THREAD_PARAM* thread_params=SG_MALLOC(GRADIENT_THREAD_PARAM,
194  num_deriv);
195 
196  for (index_t t=0; t<num_deriv; t++)
197  {
198  CMapNode<TParameter*, CSGObject*>* node=params->get_node_ptr(t);
199 
200  thread_params[t].inf=this;
201  thread_params[t].obj=node->data;
202  thread_params[t].param=node->key;
203  thread_params[t].grad=result;
204  thread_params[t].lock=&lock;
205 
206  pthread_create(&threads[t], NULL, CInferenceMethod::get_derivative_helper,
207  (void*)&thread_params[t]);
208  }
209 
210  for (index_t t=0; t<num_deriv; t++)
211  pthread_join(threads[t], NULL);
212 
213  SG_FREE(thread_params);
214  SG_FREE(threads);
215  }
216 #endif /* HAVE_PTHREAD */
217 
218  return result;
219 }
220 
222 {
223  GRADIENT_THREAD_PARAM* thread_param=(GRADIENT_THREAD_PARAM*)p;
224 
225  CInferenceMethod* inf=thread_param->inf;
226  CSGObject* obj=thread_param->obj;
227  CMap<TParameter*, SGVector<float64_t> >* grad=thread_param->grad;
228  TParameter* param=thread_param->param;
229  CLock* lock=thread_param->lock;
230 
231  REQUIRE(param, "Parameter should not be NULL\n");
232  REQUIRE(obj, "Object of the parameter should not be NULL\n");
233 
234  SGVector<float64_t> gradient;
235 
236  if (obj==inf)
237  {
238  // try to find dervative wrt InferenceMethod.parameter
239  gradient=inf->get_derivative_wrt_inference_method(param);
240  }
241  else if (obj==inf->m_model)
242  {
243  // try to find derivative wrt LikelihoodModel.parameter
244  gradient=inf->get_derivative_wrt_likelihood_model(param);
245  }
246  else if (obj==inf->m_kernel)
247  {
248  // try to find derivative wrt Kernel.parameter
249  gradient=inf->get_derivative_wrt_kernel(param);
250  }
251  else if (obj==inf->m_mean)
252  {
253  // try to find derivative wrt MeanFunction.parameter
254  gradient=inf->get_derivative_wrt_mean(param);
255  }
256  else
257  {
258  SG_SERROR("Can't compute derivative of negative log marginal "
259  "likelihood wrt %s.%s", obj->get_name(), param->m_name);
260  }
261 
262  lock->lock();
263  grad->add(param, gradient);
264  lock->unlock();
265 
266  return NULL;
267 }
268 
270 {
271  check_members();
273 }
274 
276 {
277  REQUIRE(m_features, "Training features should not be NULL\n")
279  "Number of training features must be greater than zero\n")
280  REQUIRE(m_labels, "Labels should not be NULL\n")
282  "Number of labels must be greater than zero\n")
284  "Number of training vectors must match number of labels, which is "
285  "%d, but number of training vectors is %d\n",
287  REQUIRE(m_kernel, "Kernel should not be NULL\n")
288  REQUIRE(m_mean, "Mean function should not be NULL\n")
289 }
290 
292 {
295 }
296 
297 #endif /* HAVE_EIGEN3 */

SHOGUN Machine Learning Toolbox - Documentation