SHOGUN  4.1.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
SVMSGD.cpp
Go to the documentation of this file.
1 /*
2  SVM with stochastic gradient
3  Copyright (C) 2007- Leon Bottou
4 
5  This program is free software; you can redistribute it and/or
6  modify it under the terms of the GNU Lesser General Public
7  License as published by the Free Software Foundation; either
8  version 2.1 of the License, or (at your option) any later version.
9 
10  This program is distributed in the hope that it will be useful,
11  but WITHOUT ANY WARRANTY; without even the implied warranty of
12  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13  GNU General Public License for more details.
14 
15  You should have received a copy of the GNU Lesser General Public
16  License along with this library; if not, write to the Free Software
17  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
18  $Id: svmsgd.cpp,v 1.13 2007/10/02 20:40:06 cvs Exp $
19 
20  Shogun adjustments (w) 2008-2009 Soeren Sonnenburg
21 */
22 
24 #include <shogun/base/Parameter.h>
25 #include <shogun/lib/Signal.h>
28 #include <shogun/loss/HingeLoss.h>
29 
30 using namespace shogun;
31 
34 {
35  init();
36 }
37 
40 {
41  init();
42 
43  C1=C;
44  C2=C;
45 }
46 
49 {
50  init();
51  C1=C;
52  C2=C;
53 
54  set_features(traindat);
55  set_labels(trainlab);
56 }
57 
59 {
60  SG_UNREF(loss);
61 }
62 
64 {
65  SG_REF(loss_func);
66  SG_UNREF(loss);
67  loss=loss_func;
68 }
69 
71 {
72  // allocate memory for w and initialize everyting w and bias with 0
75 
76  if (data)
77  {
78  if (!data->has_property(FP_DOT))
79  SG_ERROR("Specified features are not of type CDotFeatures\n")
80  set_features((CDotFeatures*) data);
81  }
82 
84 
85  int32_t num_train_labels=m_labels->get_num_labels();
86  int32_t num_vec=features->get_num_vectors();
87 
88  ASSERT(num_vec==num_train_labels)
89  ASSERT(num_vec>0)
90 
92  w.zero();
93  bias=0;
94 
95  float64_t lambda= 1.0/(C1*num_vec);
96 
97  // Shift t in order to have a
98  // reasonable initial learning rate.
99  // This assumes |x| \approx 1.
100  float64_t maxw = 1.0 / sqrt(lambda);
101  float64_t typw = sqrt(maxw);
102  float64_t eta0 = typw / CMath::max(1.0,-loss->first_derivative(-typw,1));
103  t = 1 / (eta0 * lambda);
104 
105  SG_INFO("lambda=%f, epochs=%d, eta0=%f\n", lambda, epochs, eta0)
106 
107 
108  //do the sgd
109  calibrate();
110 
111  SG_INFO("Training on %d vectors\n", num_vec)
113 
114  ELossType loss_type = loss->get_loss_type();
115  bool is_log_loss = false;
116  if ((loss_type == L_LOGLOSS) || (loss_type == L_LOGLOSSMARGIN))
117  is_log_loss = true;
118 
119  for(int32_t e=0; e<epochs && (!CSignal::cancel_computations()); e++)
120  {
121  count = skip;
122  for (int32_t i=0; i<num_vec; i++)
123  {
124  float64_t eta = 1.0 / (lambda * t);
125  float64_t y = ((CBinaryLabels*) m_labels)->get_label(i);
126  float64_t z = y * (features->dense_dot(i, w.vector, w.vlen) + bias);
127 
128  if (z < 1 || is_log_loss)
129  {
130  float64_t etd = -eta * loss->first_derivative(z,1);
131  features->add_to_dense_vec(etd * y / wscale, i, w.vector, w.vlen);
132 
133  if (use_bias)
134  {
135  if (use_regularized_bias)
136  bias *= 1 - eta * lambda * bscale;
137  bias += etd * y * bscale;
138  }
139  }
140 
141  if (--count <= 0)
142  {
143  float64_t r = 1 - eta * lambda * skip;
144  if (r < 0.8)
145  r = pow(1 - eta * lambda, skip);
147  count = skip;
148  }
149  t++;
150  }
151  }
152 
154  SG_INFO("Norm: %.6f, Bias: %.6f\n", wnorm, bias)
155 
156  return true;
157 }
158 
160 {
162  int32_t num_vec=features->get_num_vectors();
163  int32_t c_dim=features->get_dim_feature_space();
164 
165  ASSERT(num_vec>0)
166  ASSERT(c_dim>0)
167 
168  float64_t* c=SG_MALLOC(float64_t, c_dim);
169  memset(c, 0, c_dim*sizeof(float64_t));
170 
171  SG_INFO("Estimating sparsity and bscale num_vec=%d num_feat=%d.\n", num_vec, c_dim)
172 
173  // compute average gradient size
174  int32_t n = 0;
175  float64_t m = 0;
176  float64_t r = 0;
177 
178  for (int32_t j=0; j<num_vec && m<=1000; j++, n++)
179  {
181  features->add_to_dense_vec(1, j, c, c_dim, true);
182 
183  //waste cpu cycles for readability
184  //(only changed dims need checking)
185  m=CMath::max(c, c_dim);
186  }
187 
188  // bias update scaling
189  bscale = 0.5*m/n;
190 
191  // compute weight decay skip
192  skip = (int32_t) ((16 * n * c_dim) / r);
193  SG_INFO("using %d examples. skip=%d bscale=%.6f\n", n, skip, bscale)
194 
195  SG_FREE(c);
196 }
197 
198 void CSVMSGD::init()
199 {
200  t=1;
201  C1=1;
202  C2=1;
203  wscale=1;
204  bscale=1;
205  epochs=5;
206  skip=1000;
207  count=1000;
208  use_bias=true;
209 
210  use_regularized_bias=false;
211 
212  loss=new CHingeLoss();
213  SG_REF(loss);
214 
215  m_parameters->add(&C1, "C1", "Cost constant 1.");
216  m_parameters->add(&C2, "C2", "Cost constant 2.");
217  m_parameters->add(&wscale, "wscale", "W scale");
218  m_parameters->add(&bscale, "bscale", "b scale");
219  m_parameters->add(&epochs, "epochs", "epochs");
220  m_parameters->add(&skip, "skip", "skip");
221  m_parameters->add(&count, "count", "count");
222  m_parameters->add(&use_bias, "use_bias", "Indicates if bias is used.");
223  m_parameters->add(&use_regularized_bias, "use_regularized_bias", "Indicates if bias is regularized.");
224 }

SHOGUN Machine Learning Toolbox - Documentation