SHOGUN  3.2.1
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
QuadraticTimeMMD.cpp
Go to the documentation of this file.
1 /*
2  * Copyright (c) The Shogun Machine Learning Toolbox
3  * Written (w) 2012-2013 Heiko Strathmann
4  * All rights reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions are met:
8  *
9  * 1. Redistributions of source code must retain the above copyright notice, this
10  * list of conditions and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright notice,
12  * this list of conditions and the following disclaimer in the documentation
13  * and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
16  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
17  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
18  * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
19  * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
20  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
21  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
22  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
24  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25  *
26  * The views and conclusions contained in the software and documentation are those
27  * of the authors and should not be interpreted as representing official policies,
28  * either expressed or implied, of the Shogun Development Team.
29  */
30 
35 #include <shogun/kernel/Kernel.h>
38 
39 using namespace shogun;
40 #ifdef HAVE_EIGEN3
41 using namespace Eigen;
42 #endif
43 
45 {
46  init();
47 }
48 
50  index_t m) :
51  CKernelTwoSampleTest(kernel, p_and_q, m)
52 {
53  init();
54 }
55 
57  CFeatures* q) : CKernelTwoSampleTest(kernel, p, q)
58 {
59  init();
60 }
61 
63  CKernelTwoSampleTest(custom_kernel, NULL, m)
64 {
65  init();
66 }
67 
69 {
70 }
71 
72 void CQuadraticTimeMMD::init()
73 {
74  SG_ADD(&m_num_samples_spectrum, "num_samples_spectrum", "Number of samples"
75  " for spectrum method null-distribution approximation",
77  SG_ADD(&m_num_eigenvalues_spectrum, "num_eigenvalues_spectrum", "Number of "
78  " Eigenvalues for spectrum method null-distribution approximation",
80  SG_ADD((machine_int_t*)&m_statistic_type, "statistic_type",
81  "Biased or unbiased MMD", MS_NOT_AVAILABLE);
82 
86 }
87 
89 {
90  /* split computations into three terms from JLMR paper (see documentation )*/
91  index_t m=m_m;
92  index_t n=0;
93 
94  /* check if kernel is precomputed (custom kernel) */
97  else
98  {
99  REQUIRE(m_p_and_q, "The samples are not initialized!\n");
100  n=m_p_and_q->get_num_vectors()-m;
101  }
102 
103  SG_DEBUG("Computing MMD_u with %d samples from p and %d samples from q\n",
104  m, n);
105 
106  /* init kernel with features */
108 
109  /* first term */
110  float64_t first=0;
111  for (index_t i=0; i<m; ++i)
112  {
113  /* ensure i!=j while adding up */
114  for (index_t j=0; j<m; ++j)
115  first+=i==j ? 0 : m_kernel->kernel(i,j);
116  }
117  first/=m*(m-1);
118 
119  /* second term */
120  float64_t second=0;
121  for (index_t i=m; i<m+n; ++i)
122  {
123  /* ensure i!=j while adding up */
124  for (index_t j=m; j<m+n; ++j)
125  second+=i==j ? 0 : m_kernel->kernel(i,j);
126  }
127  second/=n*(n-1);
128 
129  /* third term */
130  float64_t third=0;
131  for (index_t i=0; i<m; ++i)
132  {
133  for (index_t j=m; j<m+n; ++j)
134  third+=m_kernel->kernel(i,j);
135  }
136  third*=2.0/m/n;
137 
138  SG_DEBUG("first=%f, second=%f, third=%f\n", first, second, third);
139 
140  float64_t mmd_u=first+second-third;
141 
142  /* return m*MMD_u when m=n, (m+n)*MMD_u in general case */
143  if (m==n)
144  return m*mmd_u;
145 
146  return (m+n)*mmd_u;
147 }
148 
150 {
151  /* split computations into three terms from JLMR paper (see documentation )*/
152  index_t m=m_m;
153  index_t n=0;
154 
155  /* check if kernel is precomputed (custom kernel) */
157  n=m_kernel->get_num_vec_lhs()-m;
158  else
159  {
160  REQUIRE(m_p_and_q, "The samples are not initialized!\n");
161  n=m_p_and_q->get_num_vectors()-m;
162  }
163 
164  SG_DEBUG("Computing MMD_b with %d samples from p and %d samples from q\n",
165  m, n);
166 
167  /* init kernel with features */
169 
170  /* first term */
171  float64_t first=0;
172  for (index_t i=0; i<m; ++i)
173  {
174  for (index_t j=0; j<m; ++j)
175  first+=m_kernel->kernel(i,j);
176  }
177  first/=m*m;
178 
179  /* second term */
180  float64_t second=0;
181  for (index_t i=m; i<m+n; ++i)
182  {
183  for (index_t j=m; j<m+n; ++j)
184  second+=m_kernel->kernel(i,j);
185  }
186  second/=n*n;
187 
188  /* third term */
189  float64_t third=0;
190  for (index_t i=0; i<m; ++i)
191  {
192  for (index_t j=m; j<m+n; ++j)
193  third+=m_kernel->kernel(i,j);
194  }
195  third*=2.0/m/n;
196 
197  SG_DEBUG("first=%f, second=%f, third=%f\n", first, second, third);
198 
199  float64_t mmd_b=first+second-third;
200 
201  /* return m*MMD_b when m=n, (m+n)*MMD_b in general case */
202  if (m==n)
203  return m*mmd_b;
204 
205  return (m+n)*mmd_b;
206 }
207 
209 {
210  if (!m_kernel)
211  SG_ERROR("No kernel specified!\n")
212 
213  float64_t result=0;
214  switch (m_statistic_type)
215  {
216  case UNBIASED:
218  break;
219  case BIASED:
220  result=compute_biased_statistic();
221  break;
222  default:
223  SG_ERROR("Unknown statistic type!\n");
224  break;
225  }
226 
227  return result;
228 }
229 
231 {
232  float64_t result=0;
233 
235  {
236  case MMD2_SPECTRUM:
237  {
238 #ifdef HAVE_EIGEN3
239  /* get samples from null-distribution and compute p-value of statistic */
242  null_samples.qsort();
243  index_t pos=null_samples.find_position_to_insert(statistic);
244  result=1.0-((float64_t)pos)/null_samples.vlen;
245 #else // HAVE_EIGEN3
246  SG_ERROR("Only possible if shogun is compiled with EIGEN3 enabled\n");
247 #endif // HAVE_EIGEN3
248  break;
249  }
250 
251  case MMD2_GAMMA:
252  {
253  /* fit gamma and return cdf at statistic */
255  result=CStatistics::gamma_cdf(statistic, params[0], params[1]);
256  break;
257  }
258 
259  default:
260  result=CKernelTwoSampleTest::compute_p_value(statistic);
261  break;
262  }
263 
264  return result;
265 }
266 
268  bool multiple_kernels)
269 {
270  SGVector<float64_t> mmds;
271  if (!multiple_kernels)
272  {
273  /* just one mmd result */
274  mmds=SGVector<float64_t>(1);
275  mmds[0]=compute_statistic();
276  }
277  else
278  {
280  "multiple kernels specified, but underlying kernel is not of type "
281  "K_COMBINED\n");
282 
283  /* cast and allocate memory for results */
285  SG_REF(combined);
286  mmds=SGVector<float64_t>(combined->get_num_subkernels());
287 
288  /* iterate through all kernels and compute statistic */
289  /* TODO this might be done in parallel */
290  for (index_t i=0; i<mmds.vlen; ++i)
291  {
292  CKernel* current=combined->get_kernel(i);
293  /* temporarily replace underlying kernel and compute statistic */
294  m_kernel=current;
295  mmds[i]=compute_statistic();
296 
297  SG_UNREF(current);
298  }
299 
300  /* restore combined kernel */
301  m_kernel=combined;
302  SG_UNREF(combined);
303  }
304 
305  return mmds;
306 }
307 
309 {
310  float64_t result=0;
311 
313  {
314  case MMD2_SPECTRUM:
315  {
316 #ifdef HAVE_EIGEN3
317  /* get samples from null-distribution and compute threshold */
320  null_samples.qsort();
321  result=null_samples[index_t(CMath::floor(null_samples.vlen*(1-alpha)))];
322 #else // HAVE_EIGEN3
323  SG_ERROR("Only possible if shogun is compiled with EIGEN3 enabled\n");
324 #endif // HAVE_EIGEN3
325  break;
326  }
327 
328  case MMD2_GAMMA:
329  {
330  /* fit gamma and return inverse cdf at alpha */
332  result=CStatistics::inverse_gamma_cdf(alpha, params[0], params[1]);
333  break;
334  }
335 
336  default:
337  /* sampling null is handled here */
339  break;
340  }
341 
342  return result;
343 }
344 
345 
346 #ifdef HAVE_EIGEN3
348  index_t num_eigenvalues)
349 {
350  REQUIRE(m_kernel, "(%d, %d): No kernel set!\n", num_samples,
351  num_eigenvalues);
353  "(%d, %d): No features set and no custom kernel in use!\n",
354  num_samples, num_eigenvalues);
355 
356  index_t m=m_m;
357  index_t n=0;
358 
359  /* check if kernel is precomputed (custom kernel) */
361  n=m_kernel->get_num_vec_lhs()-m;
362  else
363  {
364  REQUIRE(m_p_and_q, "The samples are not initialized!\n");
365  n=m_p_and_q->get_num_vectors()-m;
366  }
367 
368  if (num_samples<=2)
369  {
370  SG_ERROR("Number of samples has to be at least 2, "
371  "better in the hundreds");
372  }
373 
374  if (num_eigenvalues>m+n-1)
375  SG_ERROR("Number of Eigenvalues too large\n");
376 
377  if (num_eigenvalues<1)
378  SG_ERROR("Number of Eigenvalues too small\n");
379 
380  /* imaginary matrix K=[K KL; KL' L] (MATLAB notation)
381  * K is matrix for XX, L is matrix for YY, KL is XY, LK is YX
382  * works since X and Y are concatenated here */
385 
386  /* center matrix K=H*K*H */
387  K.center();
388 
389  /* compute eigenvalues and select num_eigenvalues largest ones */
390  Map<MatrixXd> c_kernel_matrix(K.matrix, K.num_rows, K.num_cols);
391  SelfAdjointEigenSolver<MatrixXd> eigen_solver(c_kernel_matrix);
392  REQUIRE(eigen_solver.info()==Eigen::Success,
393  "Eigendecomposition failed!\n");
394  index_t max_num_eigenvalues=eigen_solver.eigenvalues().rows();
395 
396  /* precomputing terms with rho_x and rho_y of equation 10 in [1]
397  * (see documentation) */
398  float64_t rho_x=float64_t(m)/(m+n);
399  float64_t rho_y=1-rho_x;
400 
401  /* instead of using two Gaussian rv's ~ N(0,1), we'll use just one rv
402  * ~ N(0, 1/rho_x+1/rho_y) (derived from eq 10 in [1]) */
403  float64_t std_dev=CMath::sqrt(1/rho_x+1/rho_y);
404  float64_t inv_rho_x_y=1/(rho_x*rho_y);
405 
406  SG_DEBUG("Using Gaussian samples ~ N(0,%f)\n", std_dev*std_dev);
407 
408  /* finally, sample from null distribution */
409  SGVector<float64_t> null_samples(num_samples);
410  for (index_t i=0; i<num_samples; ++i)
411  {
412  null_samples[i]=0;
413  for (index_t j=0; j<num_eigenvalues; ++j)
414  {
415  /* compute the right hand multiple of eq. 10 in [1] using one RV.
416  * randn_double() gives a sample from N(0,1), we need samples
417  * from N(0,1/rho_x+1/rho_y) */
418  float64_t z_j=std_dev*CMath::randn_double();
419 
420  SG_DEBUG("z_j=%f\n", z_j);
421 
422  float64_t multiple=CMath::pow(z_j, 2);
423 
424  /* take largest EV, scale by 1/(m+n) on the fly and take abs value*/
425  float64_t eigenvalue_estimate=CMath::abs(1.0/(m+n)
426  *eigen_solver.eigenvalues()[max_num_eigenvalues-1-j]);
427 
429  multiple-=inv_rho_x_y;
430 
431  SG_DEBUG("multiple=%f, eigenvalue=%f\n", multiple,
432  eigenvalue_estimate);
433 
434  null_samples[i]+=eigenvalue_estimate*multiple;
435  }
436  }
437 
438  /* when m=n, return m*MMD^2 instead */
439  if (m==n)
440  null_samples.scale(0.5);
441 
442  return null_samples;
443 }
444 #endif // HAVE_EIGEN3
445 
447 {
448  REQUIRE(m_kernel, "No kernel set!\n");
450  "No features set and no custom kernel in use!\n");
451 
452  index_t num_data;
454  num_data=m_kernel->get_num_vec_rhs();
455  else
456  num_data=m_p_and_q->get_num_vectors();
457 
458  if (m_m!=num_data/2)
459  SG_ERROR("Currently, only equal sample sizes are supported\n");
460 
461  /* evtl. warn user not to use wrong statistic type */
463  {
464  SG_WARNING("Note: provided statistic has "
465  "to be BIASED. Please ensure that! To get rid of warning,"
466  "call %s::set_statistic_type(BIASED)\n", get_name());
467  }
468 
469  /* imaginary matrix K=[K KL; KL' L] (MATLAB notation)
470  * K is matrix for XX, L is matrix for YY, KL is XY, LK is YX
471  * works since X and Y are concatenated here */
473 
474  /* compute mean under H0 of MMD, which is
475  * meanMMD = 2/m * ( 1 - 1/m*sum(diag(KL)) );
476  * in MATLAB.
477  * Remove diagonals on the fly */
478  float64_t mean_mmd=0;
479  for (index_t i=0; i<m_m; ++i)
480  {
481  /* virtual KL matrix is in upper right corner of SHOGUN K matrix
482  * so this sums the diagonal of the matrix between X and Y*/
483  mean_mmd+=m_kernel->kernel(i, m_m+i);
484  }
485  mean_mmd=2.0/m_m*(1.0-1.0/m_m*mean_mmd);
486 
487  /* compute variance under H0 of MMD, which is
488  * varMMD = 2/m/(m-1) * 1/m/(m-1) * sum(sum( (K + L - KL - KL').^2 ));
489  * in MATLAB, so sum up all elements */
490  float64_t var_mmd=0;
491  for (index_t i=0; i<m_m; ++i)
492  {
493  for (index_t j=0; j<m_m; ++j)
494  {
495  /* dont add diagonal of all pairs of imaginary kernel matrices */
496  if (i==j || m_m+i==j || m_m+j==i)
497  continue;
498 
499  float64_t to_add=m_kernel->kernel(i, j);
500  to_add+=m_kernel->kernel(m_m+i, m_m+j);
501  to_add-=m_kernel->kernel(i, m_m+j);
502  to_add-=m_kernel->kernel(m_m+i, j);
503  var_mmd+=CMath::pow(to_add, 2);
504  }
505  }
506  var_mmd*=2.0/m_m/(m_m-1)*1.0/m_m/(m_m-1);
507 
508  /* parameters for gamma distribution */
509  float64_t a=CMath::pow(mean_mmd, 2)/var_mmd;
510  float64_t b=var_mmd*m_m / mean_mmd;
511 
512  SGVector<float64_t> result(2);
513  result[0]=a;
514  result[1]=b;
515 
516  return result;
517 }
518 
520  num_samples_spectrum)
521 {
522  m_num_samples_spectrum=num_samples_spectrum;
523 }
524 
526  index_t num_eigenvalues_spectrum)
527 {
528  m_num_eigenvalues_spectrum=num_eigenvalues_spectrum;
529 }
530 
532  statistic_type)
533 {
534  m_statistic_type=statistic_type;
535 }
536 

SHOGUN Machine Learning Toolbox - Documentation