SHOGUN  3.2.1
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
MKLMulticlassGradient.cpp
Go to the documentation of this file.
1 /*
2  * This program is free software; you can redistribute it and/or modify
3  * it under the terms of the GNU General Public License as published by
4  * the Free Software Foundation; either version 3 of the License, or
5  * (at your option) any later version.
6  *
7  * Written (W) 2009 Alexander Binder
8  * Copyright (C) 2009 Fraunhofer Institute FIRST and Max-Planck-Society
9  *
10  * Update to patch 0.10.0 - thanks to Eric aka Yoo (thereisnoknife@gmail.com)
11  *
12  */
13 
16 #include <vector>
17 #include <cmath>
18 #include <cassert>
19 
20 using namespace shogun;
21 
23 {
24  numkernels = 0;
25  pnorm=2;
26 
27 }
29 {
30 
31 }
32 
34 {
36  pnorm=gl.pnorm;
37  return (*this);
38 
39 }
41 {
43  pnorm=gl.pnorm;
44 
45 }
46 
47 void MKLMulticlassGradient::setup(const int32_t numkernels2)
48 {
49  numkernels=numkernels2;
50  if (numkernels<=1)
51  {
52  SG_ERROR("void glpkwrapper::setup(const int32_tnumkernels): input "
53  "numkernels out of bounds: %d\n",numkernels);
54  }
55 
56 }
57 
59 {
60  pnorm=norm;
61  if(pnorm<1 )
62  SG_ERROR("MKLMulticlassGradient::set_mkl_norm(float64_t norm) : parameter pnorm<1")
63 }
64 
65 
66 void MKLMulticlassGradient::addconstraint(const ::std::vector<float64_t> & normw2,
67  const float64_t sumofpositivealphas)
68 {
69  normsofsubkernels.push_back(normw2);
70  sumsofalphas.push_back(sumofpositivealphas);
71 }
72 
73 void MKLMulticlassGradient::genbetas( ::std::vector<float64_t> & weights ,const ::std::vector<float64_t> & gammas)
74 {
75 
76  assert((int32_t)gammas.size()+1==numkernels);
77 
78  double pi4=3.151265358979238/2;
79 
80  weights.resize(numkernels);
81 
82 
83  // numkernels-dimensional polar transform
84  weights[0]=1;
85 
86  for(int32_t i=0; i< numkernels-1 ;++i)
87  {
88  for(int32_t k=0; k< i+1 ;++k)
89  {
90  weights[k]*=cos( std::min(std::max(0.0,gammas[i]),pi4) );
91  }
92  weights[i+1]=sin( std::min(std::max(0.0,gammas[i]),pi4) );
93  }
94 
95  // pnorm- manifold adjustment
96  if(pnorm!=2.0)
97  {
98  for(int32_t i=0; i< numkernels ;++i)
99  weights[i]=pow(weights[i],2.0/pnorm);
100  }
101 }
102 
103 void MKLMulticlassGradient::gengammagradient( ::std::vector<float64_t> & gammagradient ,const ::std::vector<float64_t> & gammas,const int32_t dim)
104 {
105 
106  assert((int32_t)gammas.size()+1==numkernels);
107 
108  double pi4=3.151265358979238/2;
109 
110  gammagradient.resize(numkernels);
111  std::fill(gammagradient.begin(),gammagradient.end(),0.0);
112 
113  // numkernels-dimensional polar transform
114  gammagradient[0]=1;
115 
116  for(int32_t i=0; i< numkernels-1 ;++i)
117  {
118  if(i!=dim)
119  {
120  for(int32_t k=0; k< std::min(i+1,dim+2) ;++k)
121  {
122  gammagradient[k]*=pow( cos( std::min(std::max(0.0,gammas[i]),pi4) ), 2.0/pnorm) ;
123  }
124  if(i<dim)
125  gammagradient[i+1]=pow( sin( std::min(std::max(0.0,gammas[i]),pi4) ),2.0/pnorm);
126  }
127  else if(i==dim)
128  {
129  // i==dim, higher dims are 0
130  for(int32_t k=0; k< i+1 ;++k)
131  {
132  gammagradient[k]*= pow( cos( std::min(std::max(0.0,gammas[i]),pi4) ), 2.0/pnorm-1.0)*(-1)*sin( std::min(std::max(0.0,gammas[i]),pi4) );
133  }
134  gammagradient[i+1]=pow( sin( std::min(std::max(0.0,gammas[i]),pi4) ),2.0/pnorm-1)*cos( std::min(std::max(0.0,gammas[i]),pi4) );
135  }
136  }
137 }
138 
139 float64_t MKLMulticlassGradient::objectives(const ::std::vector<float64_t> & weights, const int32_t index)
140 {
141  assert(index>=0);
142  assert(index < (int32_t) sumsofalphas.size());
143  assert(index < (int32_t) normsofsubkernels.size());
144 
145 
146  float64_t obj= -sumsofalphas[index];
147  for(int32_t i=0; i< numkernels ;++i)
148  {
149  obj+=0.5*normsofsubkernels[index][i]*weights[i];
150  }
151  return(obj);
152 }
153 
154 
155 void MKLMulticlassGradient::linesearch(std::vector<float64_t> & finalbeta,const std::vector<float64_t> & oldweights)
156 {
157 
158  float64_t pi4=3.151265358979238/2;
159 
160  float64_t fingrad=1e-7;
161  int32_t maxhalfiter=20;
162  int32_t totaliters=6;
163  float64_t maxrelobjdiff=1e-6;
164 
165  std::vector<float64_t> finalgamma,curgamma;
166 
167  curgamma.resize(numkernels-1);
168  if(oldweights.empty())
169  {
170  std::fill(curgamma.begin(),curgamma.end(),pi4/2);
171  }
172  else
173  {
174  // curgamma from init: arcsin on highest dim ^p/2 !!! and divided everthing by its cos
175  std::vector<float64_t> tmpbeta(numkernels);
176  for(int32_t i=numkernels-1; i>= 0 ;--i)
177  {
178  tmpbeta[i]=pow(oldweights[i],pnorm/2);
179  }
180 
181  for(int32_t i=numkernels-1; i>= 1 ;--i)
182  {
183  curgamma[i-1]=asin(tmpbeta[i]);
184 
185  if(i<numkernels-1)
186  {
187  if( cos(curgamma[i])<=0)
188  {
189  SG_SINFO("linesearch(...): at i %d cos(curgamma[i-1])<=0 %f\n",i, cos(curgamma[i-1]))
190  //curgamma[i-1]=pi4/2;
191  }
192  }
193 
194  for(int32_t k=numkernels-2; k>= 1 ;--k) // k==0 not necessary
195  {
196  if(cos(curgamma[i-1])>0)
197  {
198  tmpbeta[k]/=cos(curgamma[i-1]);
199  if(tmpbeta[k]>1)
200  {
201  SG_SINFO("linesearch(...): at k %d tmpbeta[k]>1 %f\n",k, tmpbeta[k])
202  }
203  tmpbeta[k]=std::min(1.0,std::max(0.0, tmpbeta[k]));
204  }
205  }
206  }
207  }
208 
209  for(size_t i=0;i<curgamma.size();++i)
210  {
211  SG_SINFO("linesearch(...): curgamma[i] %f\n",curgamma[i])
212  }
213 
214 
215  bool finished=false;
216  int32_t longiters=0;
217  while(!finished)
218  {
219  ++longiters;
220  std::vector<float64_t> curbeta;
221  genbetas( curbeta ,curgamma);
222  //find smallest objective
223  int32_t minind=0;
224  float64_t minval=objectives(curbeta, minind);
225  SG_SINFO("linesearch(...): objectives at i %f\n",minval)
226  for(int32_t i=1; i< (int32_t)sumsofalphas.size() ;++i)
227  {
228  float64_t tmpval=objectives(curbeta, i);
229  SG_SINFO("linesearch(...): objectives at i %f\n",tmpval)
230  if(tmpval<minval)
231  {
232  minval=tmpval;
233  minind=i;
234  }
235  }
236  float64_t lobj=minval;
237  //compute gradient for smallest objective
238  std::vector<float64_t> curgrad;
239  for(int32_t i=0; i< numkernels-1 ;++i)
240  {
241  ::std::vector<float64_t> gammagradient;
242  gengammagradient( gammagradient ,curgamma,i);
243  curgrad.push_back(objectives(gammagradient, minind));
244  }
245  //find boundary hit point (check for each dim) to [0, pi/4]
246  std::vector<float64_t> maxalphas(numkernels-1,0);
247  float64_t maxgrad=0;
248  for(int32_t i=0; i< numkernels-1 ;++i)
249  {
250  maxgrad=std::max(maxgrad,fabs(curgrad[i]) );
251  if(curgrad[i]<0)
252  {
253  maxalphas[i]=(0-curgamma[i])/curgrad[i];
254  }
255  else if(curgrad[i]>0)
256  {
257  maxalphas[i]=(pi4-curgamma[i])/curgrad[i];
258  }
259  else
260  {
261  maxalphas[i]=1024*1024;
262  }
263  }
264 
265  float64_t maxalpha=maxalphas[0];
266  for(int32_t i=1; i< numkernels-1 ;++i)
267  {
268  maxalpha=std::min(maxalpha,maxalphas[i]);
269  }
270 
271  if((maxalpha>1024*1023)|| (maxgrad<fingrad))
272  {
273  //curgrad[i] approx 0 for all i terminate
274  finished=true;
275  finalgamma=curgamma;
276  }
277  else // of if(maxalpha>1024*1023)
278  {
279  //linesearch: shrink until min of all objective increases compared to starting point, then check left half and right halve until finish
280  // curgamma + al*curgrad ,aximizes al in [0, maxal]
281  float64_t leftalpha=0, rightalpha=maxalpha, midalpha=(leftalpha+rightalpha)/2;
282 
283  std::vector<float64_t> tmpgamma=curgamma, tmpbeta;
284  for(int32_t i=1; i< numkernels-1 ;++i)
285  {
286  tmpgamma[i]=tmpgamma[i]+rightalpha*curgrad[i];
287  }
288  genbetas( tmpbeta ,tmpgamma);
289  float64_t curobj=objectives(tmpbeta, 0);
290  for(int32_t i=1; i< (int32_t)sumsofalphas.size() ;++i)
291  {
292  curobj=std::min(curobj,objectives(tmpbeta, i));
293  }
294 
295  int curhalfiter=0;
296  while((curobj < minval)&&(curhalfiter<maxhalfiter)&&(fabs(curobj/minval-1 ) > maxrelobjdiff ))
297  {
298  rightalpha=midalpha;
299  midalpha=(leftalpha+rightalpha)/2;
300  ++curhalfiter;
301  tmpgamma=curgamma;
302  for(int32_t i=1; i< numkernels-1 ;++i)
303  {
304  tmpgamma[i]=tmpgamma[i]+rightalpha*curgrad[i];
305  }
306  genbetas( tmpbeta ,tmpgamma);
307  curobj=objectives(tmpbeta, 0);
308  for(int32_t i=1; i< (int32_t)sumsofalphas.size() ;++i)
309  {
310  curobj=std::min(curobj,objectives(tmpbeta, i));
311  }
312  }
313 
314  float64_t robj=curobj;
315  float64_t tmpobj=std::max(lobj,robj);
316  do
317  {
318 
319  tmpobj=std::max(lobj,robj);
320 
321  tmpgamma=curgamma;
322  for(int32_t i=1; i< numkernels-1 ;++i)
323  {
324  tmpgamma[i]=tmpgamma[i]+midalpha*curgrad[i];
325  }
326  genbetas( tmpbeta ,tmpgamma);
327  curobj=objectives(tmpbeta, 0);
328  for(int32_t i=1; i< (int32_t)sumsofalphas.size() ;++i)
329  {
330  curobj=std::min(curobj,objectives(tmpbeta, i));
331  }
332 
333  if(lobj>robj)
334  {
335  rightalpha=midalpha;
336  robj=curobj;
337  }
338  else
339  {
340  leftalpha=midalpha;
341  lobj=curobj;
342  }
343  midalpha=(leftalpha+rightalpha)/2;
344 
345  }
346  while( fabs(curobj/tmpobj-1 ) > maxrelobjdiff );
347  finalgamma=tmpgamma;
348  curgamma=tmpgamma;
349  } // else // of if(maxalpha>1024*1023)
350 
351  if(longiters>= totaliters)
352  {
353  finished=true;
354  }
355  }
356  genbetas(finalbeta,finalgamma);
357  float64_t nor=0;
358  for(int32_t i=0; i< numkernels ;++i)
359  {
360  nor+=pow(finalbeta[i],pnorm);
361  }
362  if(nor>0)
363  {
364  nor=pow(nor,1.0/pnorm);
365  for(int32_t i=0; i< numkernels ;++i)
366  {
367  finalbeta[i]/=nor;
368  }
369  }
370 }
371 
372 
373 void MKLMulticlassGradient::linesearch2(std::vector<float64_t> & finalbeta,const std::vector<float64_t> & oldweights)
374 {
375 
376 const float64_t epsRegul = 0.01;
377 
378 int32_t num_kernels=(int)oldweights.size();
379 int32_t nofKernelsGood=num_kernels;
380 
381 finalbeta=oldweights;
382 
383  for( int32_t p=0; p<num_kernels; ++p )
384  {
385  //SG_PRINT("MKL-direct: sumw[%3d] = %e ( oldbeta = %e )\n", p, sumw[p], old_beta[p] )
386  if( oldweights[p] >= 0.0 )
387  {
388  finalbeta[p] = normsofsubkernels.back()[p] * oldweights[p]*oldweights[p] / pnorm;
389  finalbeta[p] = CMath::pow( finalbeta[p], 1.0 / (pnorm+1.0) );
390  }
391  else
392  {
393  finalbeta[p] = 0.0;
394  --nofKernelsGood;
395  }
396  ASSERT( finalbeta[p] >= 0 )
397  }
398 
399  // --- normalize
400  float64_t Z = 0.0;
401  for( int32_t p=0; p<num_kernels; ++p )
402  Z += CMath::pow( finalbeta[p], pnorm );
403 
404  Z = CMath::pow( Z, -1.0/pnorm );
405  ASSERT( Z >= 0 )
406  for( int32_t p=0; p<num_kernels; ++p )
407  finalbeta[p] *= Z;
408 
409  // --- regularize & renormalize
410  float64_t preR = 0.0;
411  for( int32_t p=0; p<num_kernels; ++p )
412  preR += CMath::pow( oldweights[p] - finalbeta[p], 2.0 );
413 
414  const float64_t R = CMath::sqrt( preR / pnorm ) * epsRegul;
415  if( !( R >= 0 ) )
416  {
417  SG_PRINT("MKL-direct: p = %.3f\n", pnorm )
418  SG_PRINT("MKL-direct: nofKernelsGood = %d\n", nofKernelsGood )
419  SG_PRINT("MKL-direct: Z = %e\n", Z )
420  SG_PRINT("MKL-direct: eps = %e\n", epsRegul )
421  for( int32_t p=0; p<num_kernels; ++p )
422  {
423  const float64_t t = CMath::pow( oldweights[p] - finalbeta[p], 2.0 );
424  SG_PRINT("MKL-direct: t[%3d] = %e ( diff = %e = %e - %e )\n", p, t, oldweights[p]-finalbeta[p], oldweights[p], finalbeta[p] )
425  }
426  SG_PRINT("MKL-direct: preR = %e\n", preR )
427  SG_PRINT("MKL-direct: preR/p = %e\n", preR/pnorm )
428  SG_PRINT("MKL-direct: sqrt(preR/p) = %e\n", CMath::sqrt(preR/pnorm) )
429  SG_PRINT("MKL-direct: R = %e\n", R )
430  SG_ERROR("Assertion R >= 0 failed!\n" )
431  }
432 
433  Z = 0.0;
434  for( int32_t p=0; p<num_kernels; ++p )
435  {
436  finalbeta[p] += R;
437  Z += CMath::pow( finalbeta[p], pnorm );
438  ASSERT( finalbeta[p] >= 0 )
439  }
440  Z = CMath::pow( Z, -1.0/pnorm );
441  ASSERT( Z >= 0 )
442  for( int32_t p=0; p<num_kernels; ++p )
443  {
444  finalbeta[p] *= Z;
445  ASSERT( finalbeta[p] >= 0.0 )
446  if( finalbeta[p] > 1.0 )
447  finalbeta[p] = 1.0;
448  }
449 }
450 
451 void MKLMulticlassGradient::computeweights(std::vector<float64_t> & weights2)
452 {
453  if(pnorm<1 )
454  SG_ERROR("MKLMulticlassGradient::computeweights(std::vector<float64_t> & weights2) : parameter pnorm<1")
455 
456  SG_SDEBUG("MKLMulticlassGradient::computeweights(...): pnorm %f\n",pnorm)
457 
458  std::vector<float64_t> initw(weights2);
459  linesearch2(weights2,initw);
460 
461  SG_SINFO("MKLMulticlassGradient::computeweights(...): newweights \n")
462  for(size_t i=0;i<weights2.size();++i)
463  {
464  SG_SINFO(" %f",weights2[i])
465  }
466  SG_SINFO(" \n")
467 
468  /*
469  int maxnumlinesrch=15;
470  float64_t maxdiff=1e-6;
471 
472  bool finished =false;
473  int numiter=0;
474  do
475  {
476  ++numiter;
477  std::vector<float64_t> initw(weights2);
478  linesearch(weights2,initw);
479  float64_t norm=0;
480  if(!initw.empty())
481  {
482  for(size_t i=0;i<weights2.size();++i)
483  {
484  norm+=(weights2[i]-initw[i])*(weights2[i]-initw[i]);
485  }
486  norm=sqrt(norm);
487  }
488  else
489  {
490  norm=maxdiff+1;
491  }
492 
493  if((norm < maxdiff) ||(numiter>=maxnumlinesrch ))
494  {
495  finished=true;
496  }
497  // for(size_t i=0;i<weights2.size();++i)
498  // {
499  // SG_SINFO("MKLMulticlassGradient::computeweights(...): oldweights %f\n",initw[i])
500  // }
501  SG_SINFO("MKLMulticlassGradient::computeweights(...): newweights at iter %d normdiff %f\n",numiter,norm)
502  for(size_t i=0;i<weights2.size();++i)
503  {
504  SG_SINFO(" %f",weights2[i])
505  }
506  SG_SINFO(" \n")
507  }
508  while(false==finished);
509  */
510 
511 }

SHOGUN Machine Learning Toolbox - Documentation