SHOGUN  6.0.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules
LDA.cpp
Go to the documentation of this file.
1 /*
2  * This program is free software; you can redistribute it and/or modify
3  * it under the terms of the GNU General Public License as published by
4  * the Free Software Foundation; either version 3 of the License, or
5  * (at your option) any later version.
6  *
7  * Written (W) 1999-2009 Soeren Sonnenburg
8  * Written (W) 2014 Abhijeet Kislay
9  * Copyright (C) 1999-2009 Fraunhofer Institute FIRST and Max-Planck-Society
10  */
11 #include <shogun/lib/config.h>
12 
13 #include <shogun/lib/common.h>
14 #include <shogun/machine/Machine.h>
16 #include <shogun/classifier/LDA.h>
17 #include <shogun/labels/Labels.h>
21 
22 using namespace Eigen;
23 using namespace shogun;
24 
25 CLDA::CLDA(float64_t gamma, ELDAMethod method)
27 {
28  init();
29  m_method=method;
30  m_gamma=gamma;
31 }
32 
34  CLabels *trainlab, ELDAMethod method)
35  :CLinearMachine(), m_gamma(gamma)
36 {
37  init();
38  set_features(traindat);
39  set_labels(trainlab);
40  m_method=method;
41  m_gamma=gamma;
42 }
43 
44 void CLDA::init()
45 {
47  m_gamma=0;
48  SG_ADD((machine_int_t*) &m_method, "m_method",
49  "Method used for LDA calculation", MS_NOT_AVAILABLE);
50  SG_ADD((machine_int_t*) &m_gamma, "m_gamma",
51  "Regularization parameter", MS_NOT_AVAILABLE);
52 }
53 
55 {
56 }
57 
59 {
60  REQUIRE(m_labels, "Labels for the given features are not specified!\n")
61  REQUIRE(m_labels->get_label_type()==LT_BINARY, "The labels should of type"
62  " CBinaryLabels! you provided %s \n",m_labels->get_name())
63 
64  if(data)
65  {
66  if(!data->has_property(FP_DOT))
67  SG_ERROR("Specified features are not of type CDotFeatures\n")
68  set_features((CDotFeatures*) data);
69  }
70  else
71  {
72  data = get_features();
73  REQUIRE(data, "Features have not been provided.\n")
74  }
75 
76  SGVector<int32_t>train_labels=((CBinaryLabels *)m_labels)->get_int_labels();
77  REQUIRE(train_labels.vector,"Provided Labels are empty!\n")
78 
79  REQUIRE(data->get_num_vectors() == train_labels.vlen,"Number of training examples(%d) should be "
80  "equal to number of labels (%d)!\n", data->get_num_vectors(), train_labels.vlen);
81 
82  if(data->get_feature_type() == F_SHORTREAL)
83  return CLDA::train_machine_templated<float32_t>(train_labels, data);
84  else if(data->get_feature_type() == F_DREAL)
85  return CLDA::train_machine_templated<float64_t>(train_labels, data);
86  else if(data->get_feature_type() == F_LONGREAL)
87  return CLDA::train_machine_templated<floatmax_t>(train_labels, data);
88 
89  return false;
90 }
91 
92 template <typename ST>
94 {
95  SGMatrix<ST>feature_matrix=((CDenseFeatures<ST>*)features)
96  ->get_feature_matrix();
97  int32_t num_feat=feature_matrix.num_rows;
98  int32_t num_vec=feature_matrix.num_cols;
99 
100  SGVector<int32_t> classidx_neg(num_vec);
101  SGVector<int32_t> classidx_pos(num_vec);
102 
103  int32_t i=0;
104  int32_t num_neg=0;
105  int32_t num_pos=0;
106 
107  for(i=0; i<train_labels.vlen; i++)
108  {
109  if (train_labels.vector[i]==-1)
110  classidx_neg[num_neg++]=i;
111 
112  else if(train_labels.vector[i]==+1)
113  classidx_pos[num_pos++]=i;
114  }
115 
116  SGVector<ST> w_st(num_feat);
117  w_st.zero();
118  typename SGMatrix<ST>::EigenMatrixXt fmatrix=typename SGMatrix<ST>::EigenMatrixXtMap(feature_matrix.matrix, num_feat, num_vec);
119  typename SGVector<ST>::EigenVectorXt mean_neg(num_feat);
120  mean_neg.setZero();
121  typename SGVector<ST>::EigenVectorXt mean_pos(num_feat);
122  mean_pos.setZero();
123 
124  //mean neg
125  for(i=0; i<num_neg; i++)
126  mean_neg+=fmatrix.col(classidx_neg[i]);
127  mean_neg/=(ST)num_neg;
128 
129  // get m(-ve) - mean(-ve)
130  for(i=0; i<num_neg; i++)
131  fmatrix.col(classidx_neg[i])-=mean_neg;
132 
133  //mean pos
134  for(i=0; i<num_pos; i++)
135  mean_pos+=fmatrix.col(classidx_pos[i]);
136  mean_pos/=(ST)num_pos;
137 
138  // get m(+ve) - mean(+ve)
139  for(i=0; i<num_pos; i++)
140  fmatrix.col(classidx_pos[i])-=mean_pos;
141 
142  SGMatrix<ST>scatter_matrix(num_feat, num_feat);
143  typename SGMatrix<ST>::EigenMatrixXtMap scatter(scatter_matrix.matrix, num_feat, num_feat);
144 
145  if (m_method == FLD_LDA || (m_method==AUTO_LDA && num_vec>num_feat))
146  {
147  // covariance matrix.
148  typename SGMatrix<ST>::EigenMatrixXt cov_mat(num_feat, num_feat);
149  cov_mat=fmatrix*fmatrix.transpose();
150  scatter=cov_mat/(num_vec-1);
151  ST trace=scatter.trace();
152  ST s=1.0-((ST) m_gamma);
153  scatter*=s;
154  scatter.diagonal()+=Eigen::DenseBase<typename SGVector<ST>::EigenVectorXt>::Constant(num_feat, trace*((ST)m_gamma)/num_feat);
155 
156  // the usual way
157  // we need to find a Basic Linear Solution of A.x=b for 'x'.
158  // Instead of crudely Inverting A, we go for solve() using Decompositions.
159  // where:
160  // MatrixXd A=scatter;
161  // VectorXd b=mean_pos-mean_neg;
162  // VectorXd x=w;
163  typename SGVector<ST>::EigenVectorXtMap x(w_st.vector, num_feat);
164  LLT<typename SGMatrix<ST>::EigenMatrixXt> decomposition(scatter);
165  x=decomposition.solve(mean_pos-mean_neg);
166 
167  // get the weights w_neg(for -ve class) and w_pos(for +ve class)
168  typename SGVector<ST>::EigenVectorXt w_neg=decomposition.solve(mean_neg);
169  typename SGVector<ST>::EigenVectorXt w_pos=decomposition.solve(mean_pos);
170 
171  // get the bias.
172  bias=((float64_t)(0.5*(w_neg.dot(mean_neg)-w_pos.dot(mean_pos))));
173  }
174  else
175  {
176  //for algorithmic detail, please refer to section 16.3.1. of Bayesian
177  //Reasoning and Machine Learning by David Barber.
178 
179  //we will perform SVD here.
180  typename SGMatrix<ST>::EigenMatrixXtMap fmatrix1(feature_matrix.matrix, num_feat, num_vec);
181 
182  // to hold the centered positive and negative class data
183  typename SGMatrix<ST>::EigenMatrixXt cen_pos(num_feat,num_pos);
184  typename SGMatrix<ST>::EigenMatrixXt cen_neg(num_feat,num_neg);
185 
186  for(i=0; i<num_pos;i++)
187  cen_pos.col(i)=fmatrix.col(classidx_pos[i]);
188 
189  for(i=0; i<num_neg;i++)
190  cen_neg.col(i)=fmatrix.col(classidx_neg[i]);
191 
192  //+ve covariance matrix
193 #if EIGEN_WITH_OPERATOR_BUG
194  cen_pos=cen_pos*cen_pos.transpose();
195  cen_pos/=(ST)(num_pos-1);
196 #else
197  cen_pos=cen_pos*cen_pos.transpose()/((ST)(num_pos-1));
198 #endif
199 
200  //-ve covariance matrix
201 #if EIGEN_WITH_OPERATOR_BUG
202  cen_neg=cen_neg*cen_neg.transpose();
203  cen_neg/=(ST)(num_neg-1);
204 #else
205  cen_neg=cen_neg*cen_neg.transpose()/((ST)(num_neg-1));
206 #endif
207 
208  //within class matrix
209  typename SGMatrix<ST>::EigenMatrixXt Sw= num_pos*cen_pos+num_neg*cen_neg;
210  ST trace=Sw.trace();
211  ST s=1.0-((ST)m_gamma);
212  Sw *=s;
213  Sw.diagonal()+=Eigen::DenseBase<typename SGVector<ST>::EigenVectorXt>::Constant(num_feat, trace*((ST)m_gamma)/num_feat);
214 
215  //total mean
216  typename SGVector<ST>::EigenVectorXt mean_total=(num_pos*mean_pos+num_neg*mean_neg)/(ST)num_vec;
217 
218  //between class matrix
219  typename SGMatrix<ST>::EigenMatrixXt Sb(num_feat,2);
220  Sb.col(0)=sqrt(num_pos)*(mean_pos-mean_total);
221  Sb.col(1)=sqrt(num_neg)*(mean_neg-mean_total);
222 
223  JacobiSVD<typename SGMatrix<ST>::EigenMatrixXt> svd(fmatrix1, ComputeThinU);
224 
225  // basis to represent the solution
226  typename SGMatrix<ST>::EigenMatrixXt Q=svd.matrixU();
227  // modified between class scatter
228  Sb=Q.transpose()*(Sb*(Sb.transpose()))*Q;
229 
230  // modified within class scatter
231  Sw=Q.transpose()*Sw*Q;
232 
233  // to find SVD((inverse(Chol(Sw)))' * Sb * (inverse(Chol(Sw))))
234  //1.get Cw=Chol(Sw)
235  //find the decomposition of Cw'.
236  HouseholderQR<typename SGMatrix<ST>::EigenMatrixXt> decomposition(Sw.llt().matrixU().transpose());
237 
238  //2.get P=inv(Cw')*Sb_new
239  //MatrixXd P=decomposition.solve(Sb);
240  //3. final value to be put in SVD will be therefore:
241  // final_ output =(inv(Cw')*(P'))'
242  JacobiSVD<typename SGMatrix<ST>::EigenMatrixXt> svd2(decomposition.solve((decomposition.solve(Sb))
243  .transpose()).transpose(), ComputeThinU);
244 
245  // Since this is a linear classifier, with only binary classes,
246  // we need to keep only the 1st eigenvector.
248  x=Q*(svd2.matrixU().col(0));
249  // get the bias
250  bias=((float64_t)(x.transpose()*mean_total));
251  bias=bias*(-1);
252  }
253 
254  SGVector<float64_t> w(num_feat);
255  w.zero();
256 
257  //copy w_st into w
258  for(i = 0; i < w.size(); ++i)
259  w[i] = (float64_t) w_st[i];
260 
261  set_w(w);
262 
263  return true;
264 }
virtual const char * get_name() const =0
Eigen::Map< EigenMatrixXt, 0, Eigen::Stride< 0, 0 > > EigenMatrixXtMap
Definition: SGMatrix.h:44
virtual bool train_machine(CFeatures *data=NULL)
Definition: LDA.cpp:58
bool train_machine_templated(SGVector< int32_t > train_labels, CFeatures *features)
Definition: LDA.cpp:93
virtual ELabelType get_label_type() const =0
binary labels +1/-1
Definition: LabelTypes.h:18
virtual void set_w(const SGVector< float64_t > src_w)
The class Labels models labels, i.e. class assignments of objects.
Definition: Labels.h:43
Definition: SGMatrix.h:24
virtual int32_t get_num_vectors() const =0
CLabels * m_labels
Definition: Machine.h:365
#define SG_ERROR(...)
Definition: SGIO.h:128
#define REQUIRE(x,...)
Definition: SGIO.h:205
index_t num_cols
Definition: SGMatrix.h:465
virtual void set_features(CDotFeatures *feat)
Definition: LDA.h:143
virtual CDotFeatures * get_features()
Features that support dot products among other operations.
Definition: DotFeatures.h:44
index_t num_rows
Definition: SGMatrix.h:463
int32_t size() const
Definition: SGVector.h:136
index_t vlen
Definition: SGVector.h:545
CLDA(float64_t gamma=0, ELDAMethod method=AUTO_LDA)
Definition: LDA.cpp:25
double float64_t
Definition: common.h:60
float64_t m_gamma
Definition: LDA.h:180
Class LinearMachine is a generic interface for all kinds of linear machines like classifiers.
Definition: LinearMachine.h:63
static float64_t trace(float64_t *mat, int32_t cols, int32_t rows)
Definition: SGMatrix.cpp:391
ELDAMethod
Definition: LDA.h:26
CDotFeatures * features
all of classes and functions are contained in the shogun namespace
Definition: class_list.h:18
virtual ~CLDA()
Definition: LDA.cpp:54
int machine_int_t
Definition: common.h:69
The class Features is the base class of all feature objects.
Definition: Features.h:68
void init()
Definition: LDA.cpp:44
ELDAMethod m_method
Definition: LDA.h:182
Binary Labels for binary classification.
Definition: BinaryLabels.h:37
#define SG_ADD(...)
Definition: SGObject.h:94
bool has_property(EFeatureProperty p) const
Definition: Features.cpp:295
virtual void set_labels(CLabels *lab)
Definition: Machine.cpp:65
virtual EFeatureType get_feature_type() const =0

SHOGUN Machine Learning Toolbox - Documentation