SHOGUN  4.1.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
InferenceMethod.cpp
Go to the documentation of this file.
1 /*
2  * This program is free software; you can redistribute it and/or modify
3  * it under the terms of the GNU General Public License as published by
4  * the Free Software Foundation; either version 3 of the License, or
5  * (at your option) any later version.
6  *
7  * Written (W) 2013 Roman Votyakov
8  * Written (W) 2013 Heiko Strathmann
9  * Copyright (C) 2012 Jacob Walker
10  * Copyright (C) 2013 Roman Votyakov
11  */
12 
13 #include <shogun/lib/config.h>
14 
15 #ifdef HAVE_EIGEN3
16 
20 #include <shogun/lib/Lock.h>
21 
22 using namespace shogun;
23 
24 #ifndef DOXYGEN_SHOULD_SKIP_THIS
25 struct GRADIENT_THREAD_PARAM
26 {
27  CInferenceMethod* inf;
29  CSGObject* obj;
30  TParameter* param;
31  CLock* lock;
32 };
33 #endif /* DOXYGEN_SHOULD_SKIP_THIS */
34 
36 {
37  init();
38 }
39 
41 {
43  update();
44 
45  return SGMatrix<float64_t>(m_E);
46 }
47 
49  CMeanFunction* mean, CLabels* labels, CLikelihoodModel* model)
50 {
51  init();
52 
53  set_kernel(kernel);
54  set_features(features);
55  set_labels(labels);
56  set_model(model);
57  set_mean(mean);
58 }
59 
61 {
67 }
68 
69 void CInferenceMethod::init()
70 {
71  SG_ADD((CSGObject**)&m_kernel, "kernel", "Kernel", MS_AVAILABLE);
72  SG_ADD(&m_scale, "scale", "Kernel scale", MS_AVAILABLE, GRADIENT_AVAILABLE);
73  SG_ADD((CSGObject**)&m_model, "likelihood_model", "Likelihood model",
74  MS_AVAILABLE);
75  SG_ADD((CSGObject**)&m_mean, "mean_function", "Mean function", MS_AVAILABLE);
76  SG_ADD((CSGObject**)&m_labels, "labels", "Labels", MS_NOT_AVAILABLE);
77  SG_ADD((CSGObject**)&m_features, "features", "Features", MS_NOT_AVAILABLE);
78 
79  m_kernel=NULL;
80  m_model=NULL;
81  m_labels=NULL;
82  m_features=NULL;
83  m_mean=NULL;
84  m_scale=1.0;
85 
86  SG_ADD(&m_alpha, "alpha", "alpha vector used in process mean calculation", MS_NOT_AVAILABLE);
87  SG_ADD(&m_L, "L", "upper triangular factor of Cholesky decomposition", MS_NOT_AVAILABLE);
88  SG_ADD(&m_E, "E", "the matrix used for multi classification", MS_NOT_AVAILABLE);
89 }
90 
92  int32_t num_importance_samples, float64_t ridge_size)
93 {
94  /* sample from Gaussian approximation to q(f|y) */
96 
97  /* add ridge */
98  for (index_t i=0; i<cov.num_rows; ++i)
99  cov(i,i)+=ridge_size;
100 
102 
103  CGaussianDistribution* post_approx=new CGaussianDistribution(mean, cov);
104  SGMatrix<float64_t> samples=post_approx->sample(num_importance_samples);
105 
106  /* evaluate q(f^i|y), p(f^i|\theta), p(y|f^i), i.e.,
107  * log pdf of approximation, prior and likelihood */
108 
109  /* log pdf q(f^i|y) */
110  SGVector<float64_t> log_pdf_post_approx=post_approx->log_pdf_multiple(samples);
111 
112  /* dont need gaussian anymore, free memory */
113  SG_UNREF(post_approx);
114  post_approx=NULL;
115 
116  /* log pdf p(f^i|\theta) and free memory afterwise. Scale kernel before */
118  memcpy(scaled_kernel.matrix, m_ktrtr.matrix,
120  for (index_t i=0; i<m_ktrtr.num_rows*m_ktrtr.num_cols; ++i)
121  scaled_kernel.matrix[i]*=CMath::sq(m_scale);
122 
123  /* add ridge */
124  for (index_t i=0; i<cov.num_rows; ++i)
125  scaled_kernel(i,i)+=ridge_size;
126 
128  m_mean->get_mean_vector(m_features), scaled_kernel);
129  SGVector<float64_t> log_pdf_prior=prior->log_pdf_multiple(samples);
130  SG_UNREF(prior);
131  prior=NULL;
132 
133  /* p(y|f^i) */
135  m_labels, samples);
136 
137  /* combine probabilities */
138  ASSERT(log_likelihood.vlen==num_importance_samples);
139  ASSERT(log_likelihood.vlen==log_pdf_prior.vlen);
140  ASSERT(log_likelihood.vlen==log_pdf_post_approx.vlen);
141  SGVector<float64_t> sum(log_likelihood);
142  for (index_t i=0; i<log_likelihood.vlen; ++i)
143  sum[i]=log_likelihood[i]+log_pdf_prior[i]-log_pdf_post_approx[i];
144 
145  /* use log-sum-exp (in particular, log-mean-exp) trick to combine values */
146  return CMath::log_mean_exp(sum);
147 }
148 
151 {
152  REQUIRE(params->get_num_elements(), "Number of parameters should be greater "
153  "than zero\n")
154 
156  update();
157 
158  // get number of derivatives
159  const index_t num_deriv=params->get_num_elements();
160 
161  // create map of derivatives
163  new CMap<TParameter*, SGVector<float64_t> >(num_deriv, num_deriv);
164 
165  SG_REF(result);
166 
167  // create lock object
168  CLock lock;
169 
170 #ifdef HAVE_PTHREAD
171  if (num_deriv<2)
172  {
173 #endif /* HAVE_PTHREAD */
174  for (index_t i=0; i<num_deriv; i++)
175  {
176  CMapNode<TParameter*, CSGObject*>* node=params->get_node_ptr(i);
177 
178  GRADIENT_THREAD_PARAM thread_params;
179 
180  thread_params.inf=this;
181  thread_params.obj=node->data;
182  thread_params.param=node->key;
183  thread_params.grad=result;
184  thread_params.lock=&lock;
185 
186  get_derivative_helper((void*) &thread_params);
187  }
188 #ifdef HAVE_PTHREAD
189  }
190  else
191  {
192  pthread_t* threads=SG_MALLOC(pthread_t, num_deriv);
193  GRADIENT_THREAD_PARAM* thread_params=SG_MALLOC(GRADIENT_THREAD_PARAM,
194  num_deriv);
195 
196  for (index_t t=0; t<num_deriv; t++)
197  {
198  CMapNode<TParameter*, CSGObject*>* node=params->get_node_ptr(t);
199 
200  thread_params[t].inf=this;
201  thread_params[t].obj=node->data;
202  thread_params[t].param=node->key;
203  thread_params[t].grad=result;
204  thread_params[t].lock=&lock;
205 
206  pthread_create(&threads[t], NULL, CInferenceMethod::get_derivative_helper,
207  (void*)&thread_params[t]);
208  }
209 
210  for (index_t t=0; t<num_deriv; t++)
211  pthread_join(threads[t], NULL);
212 
213  SG_FREE(thread_params);
214  SG_FREE(threads);
215  }
216 #endif /* HAVE_PTHREAD */
217 
218  return result;
219 }
220 
222 {
223  GRADIENT_THREAD_PARAM* thread_param=(GRADIENT_THREAD_PARAM*)p;
224 
225  CInferenceMethod* inf=thread_param->inf;
226  CSGObject* obj=thread_param->obj;
227  CMap<TParameter*, SGVector<float64_t> >* grad=thread_param->grad;
228  TParameter* param=thread_param->param;
229  CLock* lock=thread_param->lock;
230 
231  REQUIRE(param, "Parameter should not be NULL\n");
232  REQUIRE(obj, "Object of the parameter should not be NULL\n");
233 
234  SGVector<float64_t> gradient;
235 
236  if (obj==inf)
237  {
238  // try to find dervative wrt InferenceMethod.parameter
239  gradient=inf->get_derivative_wrt_inference_method(param);
240  }
241  else if (obj==inf->m_model)
242  {
243  // try to find derivative wrt LikelihoodModel.parameter
244  gradient=inf->get_derivative_wrt_likelihood_model(param);
245  }
246  else if (obj==inf->m_kernel)
247  {
248  // try to find derivative wrt Kernel.parameter
249  gradient=inf->get_derivative_wrt_kernel(param);
250  }
251  else if (obj==inf->m_mean)
252  {
253  // try to find derivative wrt MeanFunction.parameter
254  gradient=inf->get_derivative_wrt_mean(param);
255  }
256  else
257  {
258  SG_SERROR("Can't compute derivative of negative log marginal "
259  "likelihood wrt %s.%s", obj->get_name(), param->m_name);
260  }
261 
262  lock->lock();
263  grad->add(param, gradient);
264  lock->unlock();
265 
266  return NULL;
267 }
268 
270 {
271  check_members();
273 }
274 
276 {
277  REQUIRE(m_features, "Training features should not be NULL\n")
279  "Number of training features must be greater than zero\n")
280  REQUIRE(m_labels, "Labels should not be NULL\n")
282  "Number of labels must be greater than zero\n")
284  "Number of training vectors must match number of labels, which is "
285  "%d, but number of training vectors is %d\n",
287  REQUIRE(m_kernel, "Kernel should not be NULL\n")
288  REQUIRE(m_mean, "Mean function should not be NULL\n")
289 }
290 
292 {
295 }
296 
297 #endif /* HAVE_EIGEN3 */
virtual void set_labels(CLabels *lab)
virtual const char * get_name() const =0
virtual void set_model(CLikelihoodModel *mod)
virtual bool init(CFeatures *lhs, CFeatures *rhs)
Definition: Kernel.cpp:98
SGVector< float64_t > m_alpha
The Inference Method base class.
virtual void set_features(CFeatures *feat)
int32_t index_t
Definition: common.h:62
The class Labels models labels, i.e. class assignments of objects.
Definition: Labels.h:43
virtual int32_t get_num_labels() const =0
static T sq(T x)
Definition: Math.h:450
parameter struct
Definition: Parameter.h:32
virtual int32_t get_num_vectors() const =0
#define REQUIRE(x,...)
Definition: SGIO.h:206
void unlock()
Definition: Lock.cpp:64
index_t num_cols
Definition: SGMatrix.h:378
virtual SGVector< float64_t > get_mean_vector(const CFeatures *features) const =0
An abstract class of the mean function.
Definition: MeanFunction.h:28
virtual SGVector< float64_t > get_derivative_wrt_likelihood_model(const TParameter *param)=0
SGMatrix< float64_t > get_kernel_matrix()
Definition: Kernel.h:219
#define SG_REF(x)
Definition: SGObject.h:51
index_t num_rows
Definition: SGMatrix.h:376
CMapNode< K, T > * get_node_ptr(int32_t index)
Definition: Map.h:247
int32_t add(const K &key, const T &data)
Definition: Map.h:101
index_t vlen
Definition: SGVector.h:494
SGMatrix< float64_t > m_L
virtual SGVector< float64_t > get_log_probability_fmatrix(const CLabels *lab, SGMatrix< float64_t > F) const
#define ASSERT(x)
Definition: SGIO.h:201
Class SGObject is the base class of all shogun objects.
Definition: SGObject.h:112
Class Lock used for synchronization in concurrent programs.
Definition: Lock.h:17
virtual SGMatrix< float64_t > get_multiclass_E()
double float64_t
Definition: common.h:50
SGMatrix< float64_t > m_E
virtual SGVector< float64_t > get_derivative_wrt_inference_method(const TParameter *param)=0
virtual void update_train_kernel()
virtual SGVector< float64_t > get_derivative_wrt_kernel(const TParameter *param)=0
virtual SGVector< float64_t > log_pdf_multiple(SGMatrix< float64_t > samples) const
virtual void set_kernel(CKernel *kern)
float64_t get_marginal_likelihood_estimate(int32_t num_importance_samples=1, float64_t ridge_size=1e-15)
#define SG_UNREF(x)
Definition: SGObject.h:52
all of classes and functions are contained in the shogun namespace
Definition: class_list.h:18
Dense version of the well-known Gaussian probability distribution, defined as .
virtual SGVector< float64_t > get_derivative_wrt_mean(const TParameter *param)=0
virtual void set_mean(CMeanFunction *m)
virtual SGMatrix< float64_t > get_posterior_covariance()=0
The class Features is the base class of all feature objects.
Definition: Features.h:68
#define SG_SERROR(...)
Definition: SGIO.h:179
virtual void check_members() const
virtual SGVector< float64_t > get_posterior_mean()=0
The Kernel base class.
Definition: Kernel.h:158
static T log_mean_exp(SGVector< T > values)
Definition: Math.h:1285
int32_t get_num_elements() const
Definition: Map.h:211
#define SG_ADD(...)
Definition: SGObject.h:81
virtual bool parameter_hash_changed()
Definition: SGObject.cpp:263
void lock()
Definition: Lock.cpp:57
The Likelihood model base class.
SGMatrix< float64_t > m_ktrtr
virtual CMap< TParameter *, SGVector< float64_t > > * get_negative_log_marginal_likelihood_derivatives(CMap< TParameter *, CSGObject * > *parameters)
CLikelihoodModel * m_model
the class CMap, a map based on the hash-table. w: http://en.wikipedia.org/wiki/Hash_table ...
Definition: SGObject.h:36
virtual SGMatrix< float64_t > sample(int32_t num_samples, SGMatrix< float64_t > pre_samples=SGMatrix< float64_t >()) const
static void * get_derivative_helper(void *p)

SHOGUN Machine Learning Toolbox - Documentation