SHOGUN  3.2.1
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
SGDQN.cpp
Go to the documentation of this file.
1 /*
2  SVM with Quasi-Newton stochastic gradient
3  Copyright (C) 2009- Antoine Bordes
4 
5  This program is free software; you can redistribute it and/or
6  modify it under the terms of the GNU Lesser General Public
7  License as published by the Free Software Foundation; either
8  version 2.1 of the License, or (at your option) any later version.
9 
10  This program is distributed in the hope that it will be useful,
11  but WITHOUT ANY WARRANTY; without even the implied warranty of
12  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13  GNU General Public License for more details.
14 
15  You should have received a copy of the GNU Lesser General Public
16  License along with this library; if not, write to the Free Software
17  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
18 
19  Shogun adjustments (w) 2011 Siddharth Kherada
20 */
21 
23 #include <shogun/base/Parameter.h>
24 #include <shogun/lib/Signal.h>
26 #include <shogun/loss/HingeLoss.h>
28 
29 using namespace shogun;
30 
33 {
34  init();
35 }
36 
39 {
40  init();
41 
42  C1=C;
43  C2=C;
44 }
45 
46 CSGDQN::CSGDQN(float64_t C, CDotFeatures* traindat, CLabels* trainlab)
48 {
49  init();
50  C1=C;
51  C2=C;
52 
53  set_features(traindat);
54  set_labels(trainlab);
55 }
56 
58 {
59  SG_UNREF(loss);
60 }
61 
63 {
64  SG_REF(loss_func);
65  SG_UNREF(loss);
66  loss=loss_func;
67 }
68 
69 void CSGDQN::compute_ratio(float64_t* W,float64_t* W_1,float64_t* B,float64_t* dst,int32_t dim,float64_t lambda,float64_t loss_val)
70 {
71  for (int32_t i=0; i < dim;i++)
72  {
73  float64_t diffw=W_1[i]-W[i];
74  if(diffw)
75  B[i]+=diffw/ (lambda*diffw+ loss_val*dst[i]);
76  else
77  B[i]+=1/lambda;
78  }
79 }
80 
82 {
83  for (int32_t i=0; i < dim;i++)
84  {
85  if(B[i])
86  {
87  Bc[i] = Bc[i] * c1 + B[i] * c2;
88  Bc[i]= CMath::min(CMath::max(Bc[i],v1),v2);
89  }
90  }
91 }
92 
94 {
95 
98 
99  if (data)
100  {
101  if (!data->has_property(FP_DOT))
102  SG_ERROR("Specified features are not of type CDotFeatures\n")
103  set_features((CDotFeatures*) data);
104  }
105 
107 
108  int32_t num_train_labels=m_labels->get_num_labels();
109  int32_t num_vec=features->get_num_vectors();
110 
111  ASSERT(num_vec==num_train_labels)
112  ASSERT(num_vec>0)
113 
115  w.zero();
116 
117  float64_t lambda= 1.0/(C1*num_vec);
118 
119  // Shift t in order to have a
120  // reasonable initial learning rate.
121  // This assumes |x| \approx 1.
122  float64_t maxw = 1.0 / sqrt(lambda);
123  float64_t typw = sqrt(maxw);
124  float64_t eta0 = typw / CMath::max(1.0,-loss->first_derivative(-typw,1));
125  t = 1 / (eta0 * lambda);
126 
127  SG_INFO("lambda=%f, epochs=%d, eta0=%f\n", lambda, epochs, eta0)
128 
129 
130  float64_t* Bc=SG_MALLOC(float64_t, w.vlen);
131  SGVector<float64_t>::fill_vector(Bc, w.vlen, 1/lambda);
132 
133  float64_t* result=SG_MALLOC(float64_t, w.vlen);
134  float64_t* B=SG_MALLOC(float64_t, w.vlen);
135 
136  //Calibrate
137  calibrate();
138 
139  SG_INFO("Training on %d vectors\n", num_vec)
141 
142  ELossType loss_type = loss->get_loss_type();
143  bool is_log_loss = false;
144  if ((loss_type == L_LOGLOSS) || (loss_type == L_LOGLOSSMARGIN))
145  is_log_loss = true;
146 
147  for(int32_t e=0; e<epochs && (!CSignal::cancel_computations()); e++)
148  {
149  count = skip;
150  bool updateB=false;
151  for (int32_t i=0; i<num_vec; i++)
152  {
154  ASSERT(w.vlen==v.vlen)
155  float64_t eta = 1.0/t;
156  float64_t y = ((CBinaryLabels*) m_labels)->get_label(i);
157  float64_t z = y * features->dense_dot(i, w.vector, w.vlen);
158  if(updateB==true)
159  {
160  if (z < 1 || is_log_loss)
161  {
163  float64_t loss_1=-loss->first_derivative(z,1);
165  SGVector<float64_t>::add(w.vector,eta*loss_1*y,result,1.0,w.vector,w.vlen);
166  float64_t z2 = y * features->dense_dot(i, w.vector, w.vlen);
167  float64_t diffloss = -loss->first_derivative(z2,1) - loss_1;
168  if(diffloss)
169  {
170  compute_ratio(w.vector,w_1.vector,B,v.vector,w.vlen,lambda,y*diffloss);
171  if(t>skip)
172  combine_and_clip(Bc,B,w.vlen,(t-skip)/(t+skip),2*skip/(t+skip),1/(100*lambda),100/lambda);
173  else
174  combine_and_clip(Bc,B,w.vlen,t/(t+skip),skip/(t+skip),1/(100*lambda),100/lambda);
175  }
176  }
177  updateB=false;
178  }
179  else
180  {
181  if(--count<=0)
182  {
184  SGVector<float64_t>::add(w.vector,-skip*lambda*eta,result,1.0,w.vector,w.vlen);
185  count = skip;
186  updateB=true;
187  }
188 
189  if (z < 1 || is_log_loss)
190  {
192  SGVector<float64_t>::add(w.vector,eta*-loss->first_derivative(z,1)*y,result,1.0,w.vector,w.vlen);
193  }
194  }
195  t++;
196  }
197  }
198  SG_FREE(result);
199  SG_FREE(B);
200 
201  return true;
202 }
203 
204 
205 
207 {
209  int32_t num_vec=features->get_num_vectors();
210  int32_t c_dim=features->get_dim_feature_space();
211 
212  ASSERT(num_vec>0)
213  ASSERT(c_dim>0)
214 
215  SG_INFO("Estimating sparsity num_vec=%d num_feat=%d.\n", num_vec, c_dim)
216 
217  int32_t n = 0;
218  float64_t r = 0;
219 
220  for (int32_t j=0; j<num_vec ; j++, n++)
222 
223 
224  // compute weight decay skip
225  skip = (int32_t) ((16 * n * c_dim) / r);
226 }
227 
228 void CSGDQN::init()
229 {
230  t=0;
231  C1=1;
232  C2=1;
233  epochs=5;
234  skip=1000;
235  count=1000;
236 
237  loss=new CHingeLoss();
238  SG_REF(loss);
239 
240  m_parameters->add(&C1, "C1", "Cost constant 1.");
241  m_parameters->add(&C2, "C2", "Cost constant 2.");
242  m_parameters->add(&epochs, "epochs", "epochs");
243  m_parameters->add(&skip, "skip", "skip");
244  m_parameters->add(&count, "count", "count");
245 }

SHOGUN Machine Learning Toolbox - Documentation