SHOGUN  3.2.1
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
InferenceMethod.cpp
Go to the documentation of this file.
1 /*
2  * This program is free software; you can redistribute it and/or modify
3  * it under the terms of the GNU General Public License as published by
4  * the Free Software Foundation; either version 3 of the License, or
5  * (at your option) any later version.
6  *
7  * Written (W) 2013 Roman Votyakov
8  * Written (W) 2013 Heiko Strathmann
9  * Copyright (C) 2012 Jacob Walker
10  * Copyright (C) 2013 Roman Votyakov
11  */
12 
13 #include <shogun/lib/config.h>
14 
15 #ifdef HAVE_EIGEN3
16 
20 #include <shogun/lib/Lock.h>
21 
22 using namespace shogun;
23 
24 #ifndef DOXYGEN_SHOULD_SKIP_THIS
25 struct GRADIENT_THREAD_PARAM
26 {
27  CInferenceMethod* inf;
29  CSGObject* obj;
30  TParameter* param;
31  CLock* lock;
32 };
33 #endif /* DOXYGEN_SHOULD_SKIP_THIS */
34 
36 {
37  init();
38 }
39 
41  CMeanFunction* mean, CLabels* labels, CLikelihoodModel* model)
42 {
43  init();
44 
45  set_kernel(kernel);
46  set_features(features);
47  set_labels(labels);
48  set_model(model);
49  set_mean(mean);
50 }
51 
53 {
59 }
60 
61 void CInferenceMethod::init()
62 {
63  SG_ADD((CSGObject**)&m_kernel, "kernel", "Kernel", MS_AVAILABLE);
64  SG_ADD(&m_scale, "scale", "Kernel scale", MS_AVAILABLE, GRADIENT_AVAILABLE);
65  SG_ADD((CSGObject**)&m_model, "likelihood_model", "Likelihood model",
66  MS_AVAILABLE);
67  SG_ADD((CSGObject**)&m_mean, "mean_function", "Mean function", MS_AVAILABLE);
68  SG_ADD((CSGObject**)&m_labels, "labels", "Labels", MS_NOT_AVAILABLE);
69  SG_ADD((CSGObject**)&m_features, "features", "Features", MS_NOT_AVAILABLE);
70 
71  m_kernel=NULL;
72  m_model=NULL;
73  m_labels=NULL;
74  m_features=NULL;
75  m_mean=NULL;
76  m_scale=1.0;
77 }
78 
80  int32_t num_importance_samples, float64_t ridge_size)
81 {
82  /* sample from Gaussian approximation to q(f|y) */
84 
85  /* add ridge */
86  for (index_t i=0; i<cov.num_rows; ++i)
87  cov(i,i)+=ridge_size;
88 
90 
91  CGaussianDistribution* post_approx=new CGaussianDistribution(mean, cov);
92  SGMatrix<float64_t> samples=post_approx->sample(num_importance_samples);
93 
94  /* evaluate q(f^i|y), p(f^i|\theta), p(y|f^i), i.e.,
95  * log pdf of approximation, prior and likelihood */
96 
97  /* log pdf q(f^i|y) */
98  SGVector<float64_t> log_pdf_post_approx=post_approx->log_pdf_multiple(samples);
99 
100  /* dont need gaussian anymore, free memory */
101  SG_UNREF(post_approx);
102  post_approx=NULL;
103 
104  /* log pdf p(f^i|\theta) and free memory afterwise. Scale kernel before */
106  memcpy(scaled_kernel.matrix, m_ktrtr.matrix,
108  for (index_t i=0; i<m_ktrtr.num_rows*m_ktrtr.num_cols; ++i)
109  scaled_kernel.matrix[i]*=CMath::sq(m_scale);
110 
111  /* add ridge */
112  for (index_t i=0; i<cov.num_rows; ++i)
113  scaled_kernel(i,i)+=ridge_size;
114 
116  m_mean->get_mean_vector(m_features), scaled_kernel);
117  SGVector<float64_t> log_pdf_prior=prior->log_pdf_multiple(samples);
118  SG_UNREF(prior);
119  prior=NULL;
120 
121  /* p(y|f^i) */
123  m_labels, samples);
124 
125  /* combine probabilities */
126  ASSERT(log_likelihood.vlen==num_importance_samples);
127  ASSERT(log_likelihood.vlen==log_pdf_prior.vlen);
128  ASSERT(log_likelihood.vlen==log_pdf_post_approx.vlen);
129  SGVector<float64_t> sum(log_likelihood);
130  for (index_t i=0; i<log_likelihood.vlen; ++i)
131  sum[i]=log_likelihood[i]+log_pdf_prior[i]-log_pdf_post_approx[i];
132 
133  /* use log-sum-exp (in particular, log-mean-exp) trick to combine values */
134  return CMath::log_mean_exp(sum);
135 }
136 
139 {
140  REQUIRE(params->get_num_elements(), "Number of parameters should be greater "
141  "than zero\n")
142 
144  update();
145 
146  // get number of derivatives
147  const index_t num_deriv=params->get_num_elements();
148 
149  // create map of derivatives
151  new CMap<TParameter*, SGVector<float64_t> >(num_deriv, num_deriv);
152 
153  SG_REF(result);
154 
155  // create lock object
156  CLock lock;
157 
158 #ifdef HAVE_PTHREAD
159  if (num_deriv<2)
160  {
161 #endif /* HAVE_PTHREAD */
162  for (index_t i=0; i<num_deriv; i++)
163  {
164  CMapNode<TParameter*, CSGObject*>* node=params->get_node_ptr(i);
165 
166  GRADIENT_THREAD_PARAM thread_params;
167 
168  thread_params.inf=this;
169  thread_params.obj=node->data;
170  thread_params.param=node->key;
171  thread_params.grad=result;
172  thread_params.lock=&lock;
173 
174  get_derivative_helper((void*) &thread_params);
175  }
176 #ifdef HAVE_PTHREAD
177  }
178  else
179  {
180  pthread_t* threads=SG_MALLOC(pthread_t, num_deriv);
181  GRADIENT_THREAD_PARAM* thread_params=SG_MALLOC(GRADIENT_THREAD_PARAM,
182  num_deriv);
183 
184  for (index_t t=0; t<num_deriv; t++)
185  {
186  CMapNode<TParameter*, CSGObject*>* node=params->get_node_ptr(t);
187 
188  thread_params[t].inf=this;
189  thread_params[t].obj=node->data;
190  thread_params[t].param=node->key;
191  thread_params[t].grad=result;
192  thread_params[t].lock=&lock;
193 
194  pthread_create(&threads[t], NULL, CInferenceMethod::get_derivative_helper,
195  (void*)&thread_params[t]);
196  }
197 
198  for (index_t t=0; t<num_deriv; t++)
199  pthread_join(threads[t], NULL);
200 
201  SG_FREE(thread_params);
202  SG_FREE(threads);
203  }
204 #endif /* HAVE_PTHREAD */
205 
206  return result;
207 }
208 
210 {
211  GRADIENT_THREAD_PARAM* thread_param=(GRADIENT_THREAD_PARAM*)p;
212 
213  CInferenceMethod* inf=thread_param->inf;
214  CSGObject* obj=thread_param->obj;
215  CMap<TParameter*, SGVector<float64_t> >* grad=thread_param->grad;
216  TParameter* param=thread_param->param;
217  CLock* lock=thread_param->lock;
218 
219  REQUIRE(param, "Parameter should not be NULL\n");
220  REQUIRE(obj, "Object of the parameter should not be NULL\n");
221 
222  SGVector<float64_t> gradient;
223 
224  if (obj==inf)
225  {
226  // try to find dervative wrt InferenceMethod.parameter
227  gradient=inf->get_derivative_wrt_inference_method(param);
228  }
229  else if (obj==inf->m_model)
230  {
231  // try to find derivative wrt LikelihoodModel.parameter
232  gradient=inf->get_derivative_wrt_likelihood_model(param);
233  }
234  else if (obj==inf->m_kernel)
235  {
236  // try to find derivative wrt Kernel.parameter
237  gradient=inf->get_derivative_wrt_kernel(param);
238  }
239  else if (obj==inf->m_mean)
240  {
241  // try to find derivative wrt MeanFunction.parameter
242  gradient=inf->get_derivative_wrt_mean(param);
243  }
244  else
245  {
246  SG_SERROR("Can't compute derivative of negative log marginal "
247  "likelihood wrt %s.%s", obj->get_name(), param->m_name);
248  }
249 
250  lock->lock();
251  grad->add(param, gradient);
252  lock->unlock();
253 
254  return NULL;
255 }
256 
258 {
259  check_members();
261 }
262 
264 {
265  REQUIRE(m_features, "Training features should not be NULL\n")
267  "Number of training features must be greater than zero\n")
268  REQUIRE(m_labels, "Labels should not be NULL\n")
270  "Number of labels must be greater than zero\n")
272  "Number of training vectors must match number of labels, which is "
273  "%d, but number of training vectors is %d\n",
275  REQUIRE(m_kernel, "Kernel should not be NULL\n")
276  REQUIRE(m_mean, "Mean function should not be NULL\n")
277 }
278 
280 {
283 }
284 
285 #endif /* HAVE_EIGEN3 */

SHOGUN Machine Learning Toolbox - Documentation