SHOGUN  5.0.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules
TwoSampleTest.cpp
Go to the documentation of this file.
1 /*
2  * Copyright (c) The Shogun Machine Learning Toolbox
3  * Written (w) 2012-2013 Heiko Strathmann
4  * Written (w) 2014 Soumyajit De
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions are met:
9  *
10  * 1. Redistributions of source code must retain the above copyright notice, this
11  * list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright notice,
13  * this list of conditions and the following disclaimer in the documentation
14  * and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19  * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
20  * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26  *
27  * The views and conclusions contained in the software and documentation are those
28  * of the authors and should not be interpreted as representing official policies,
29  * either expressed or implied, of the Shogun Development Team.
30  */
31 
35 
36 using namespace shogun;
37 
39 {
40  init();
41 }
42 
45 {
46  init();
47 
48  m_p_and_q=p_and_q;
50 
51  m_m=m;
52 }
53 
56 {
57  init();
58 
61 
62  m_m=p->get_num_vectors();
63 }
64 
66 {
68 }
69 
70 void CTwoSampleTest::init()
71 {
72  SG_ADD((CSGObject**)&m_p_and_q, "p_and_q", "Concatenated samples p and q",
74  SG_ADD(&m_m, "m", "Index of first sample of q",
76 
77  m_p_and_q=NULL;
78  m_m=0;
79 }
80 
82 {
83  SG_DEBUG("entering!\n")
84 
85  REQUIRE(m_p_and_q, "No appended features p and q!\n");
86 
87  /* compute sample statistics for null distribution */
89 
90  /* memory for index permutations. Adding of subset has to happen
91  * inside the loop since it may be copied if there already is one set */
92  SGVector<index_t> ind_permutation(m_p_and_q->get_num_vectors());
93  ind_permutation.range_fill();
94 
95  for (index_t i=0; i<m_num_null_samples; ++i)
96  {
97  /* idea: merge features of p and q, shuffle, and compute statistic.
98  * This is done using subsets here */
99 
100  /* create index permutation and add as subset. This will mix samples
101  * from p and q */
102  CMath::permute(ind_permutation);
103 
104  /* compute statistic for this permutation of mixed samples */
105  m_p_and_q->add_subset(ind_permutation);
106  results[i]=compute_statistic();
108  }
109 
110  SG_DEBUG("leaving!\n")
111  return results;
112 }
113 
115 {
116  float64_t result=0;
117 
119  {
120  /* sample a bunch of MMD values from null distribution */
122 
123  /* find out percentile of parameter "statistic" in null distribution */
124  CMath::qsort(values);
125  float64_t i=values.find_position_to_insert(statistic);
126 
127  /* return corresponding p-value */
128  result=1.0-i/values.vlen;
129  }
130  else
131  SG_ERROR("Unknown method to approximate null distribution!\n");
132 
133  return result;
134 }
135 
137 {
138  float64_t result=0;
139 
141  {
142  /* sample a bunch of MMD values from null distribution */
144 
145  /* return value of (1-alpha) quantile */
146  result=values[index_t(CMath::floor(values.vlen*(1-alpha)))];
147  }
148  else
149  SG_ERROR("Unknown method to approximate null distribution!\n");
150 
151  return result;
152 }
153 
155 {
156  /* ref before unref to avoid problems when instances are equal */
157  SG_REF(p_and_q);
159  m_p_and_q=p_and_q;
160 }
161 
163 {
164  REQUIRE(m_p_and_q, "Samples are not specified!\n");
165  REQUIRE(m_p_and_q->get_num_vectors()>m, "Provided sample size for p"
166  "(%d) is greater than total number of samples (%d)!\n",
167  m, m_p_and_q->get_num_vectors());
168  m_m=m;
169 }
170 
172 {
173  SG_REF(m_p_and_q);
174  return m_p_and_q;
175 }
176 
void range_fill(T start=0)
Definition: SGVector.cpp:171
static void permute(SGVector< T > v, CRandom *rand=NULL)
Definition: Math.h:1144
virtual float64_t compute_threshold(float64_t alpha)
virtual float64_t compute_p_value(float64_t statistic)
int32_t index_t
Definition: common.h:62
virtual SGVector< float64_t > sample_null()
virtual int32_t get_num_vectors() const =0
#define SG_ERROR(...)
Definition: SGIO.h:129
#define REQUIRE(x,...)
Definition: SGIO.h:206
#define SG_REF(x)
Definition: SGObject.h:54
static float64_t floor(float64_t d)
Definition: Math.h:407
static void qsort(T *output, int32_t size)
Definition: Math.h:1313
Hypothesis test base class. Provides an interface for statistical hypothesis testing via three method...
virtual CFeatures * create_merged_copy(CList *others)
Definition: Features.h:235
index_t vlen
Definition: SGVector.h:494
Class SGObject is the base class of all shogun objects.
Definition: SGObject.h:115
double float64_t
Definition: common.h:50
virtual void set_p_and_q(CFeatures *p_and_q)
index_t find_position_to_insert(T element)
Definition: SGVector.cpp:187
#define SG_UNREF(x)
Definition: SGObject.h:55
#define SG_DEBUG(...)
Definition: SGIO.h:107
all of classes and functions are contained in the shogun namespace
Definition: class_list.h:18
virtual void remove_subset()
Definition: Features.cpp:322
The class Features is the base class of all feature objects.
Definition: Features.h:68
ENullApproximationMethod m_null_approximation_method
#define SG_ADD(...)
Definition: SGObject.h:84
virtual CFeatures * get_p_and_q()
virtual float64_t compute_statistic()=0
virtual void add_subset(SGVector< index_t > subset)
Definition: Features.cpp:310

SHOGUN Machine Learning Toolbox - Documentation