SHOGUN  5.0.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules
sfa.h
Go to the documentation of this file.
1 /* This program is free software: you can redistribute it and/or modify
2  * it under the terms of the GNU General Public License as published by
3  * the Free Software Foundation, either version 3 of the License, or
4  * (at your option) any later version.
5  *
6  * This program is distributed in the hope that it will be useful,
7  * but WITHOUT ANY WARRANTY; without even the implied warranty of
8  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
9  * GNU General Public License for more details.
10  *
11  * You should have received a copy of the GNU General Public License
12  * along with this program. If not, see <http://www.gnu.org/licenses/>.
13  *
14  * Copyright (C) 2009 - 2012 Jun Liu and Jieping Ye
15  */
16 
17 
18 #ifndef SFA_SLEP
19 #define SFA_SLEP
20 
21 #include <shogun/lib/config.h>
22 #ifdef USE_GPL_SHOGUN
23 
24 /*
25  Revision History
26 
27  First Version available on October 10, 2009
28 
29  A runnable version on October 15, 2009
30 
31  Major revision on October 29, 2009
32  (Some functions appearing in a previous version have deleted, please refer to the previous version for the old functions.
33  Some new functions have been added as well)
34 
35 */
36 
37 /*
38 
39  Files contained in this header file sfa.h:
40 
41  1. Algorithms for solving the linear system A A^T z0 = Av (see the description of A from the following context)
42 
43  void Thomas(double *zMax, double *z0,
44  double * Av, int nn)
45 
46  void Rose(double *zMax, double *z0,
47  double * Av, int nn)
48 
49  int supportSet(double *x, double *v, double *z,
50  double *g, int * S, double lambda, int nn)
51 
52  void dualityGap(double *gap, double *z,
53  double *g, double *s, double *Av,
54  double lambda, int nn)
55 
56  void dualityGap2(double *gap, double *z,
57  double *g, double *s, double *Av,
58  double lambda, int nn)
59 
60 
61  2. The Subgraident Finding Algorithm (SFA) for solving problem (4) (refer to the description of the problem for detail)
62 
63  int sfa(double *x, double *gap,
64  double *z, double *z0, double * v, double * Av,
65  double lambda, int nn, int maxStep,
66  double *s, double *g,
67  double tol, int tau, int flag)
68 
69  int sfa_special(double *x, double *gap,
70  double *z, double * v, double * Av,
71  double lambda, int nn, int maxStep,
72  double *s, double *g,
73  double tol, int tau)
74 
75  int sfa_one(double *x, double *gap,
76  double *z, double * v, double * Av,
77  double lambda, int nn, int maxStep,
78  double *s, double *g,
79  double tol, int tau)
80 
81 
82 */
83 
84 
85 /*
86 
87  Some mathematical background.
88 
89  In this file, we discuss how to solve the following subproblem,
90 
91  min_x 1/2 \|x-v\|^2 + lambda \|A x\|_1, (1)
92 
93  which is a key problem used in the Fused Lasso Signal Approximator (FLSA).
94 
95  Also, note that, FLSA is a building block for solving the optimation problmes with fused Lasso penalty.
96 
97  In (1), x and v are n-dimensional vectors,
98  and A is a matrix with size (n-1) x n, and is defined as follows (e.g., n=4):
99  A= [ -1 1 0 0;
100  0 -1 1 0;
101  0 0 -1 1]
102 
103  The above problem can be reformulated as the following equivalent min-max optimization problem
104 
105  min_x max_z 1/2 \|x-v\|^2 + <A x, z>
106  subject to \|z\|_{infty} \leq lambda (2)
107 
108 
109  It is easy to get that, at the optimal point
110 
111  x = v - AT z, (3)
112 
113  where z is the optimal solution to the following optimization problem
114 
115  min_z 1/2 z^T A AT z - < z, A v>,
116  subject to \|z\|_{infty} \leq lambda (4)
117 
118 
119 
120  Let B=A A^T. It is easy to get that B is a (n-1) x (n-1) tridiagonal matrix.
121  When n=5, B is defined as:
122  B= [ 2 -1 0 0;
123  -1 2 -1 0;
124  0 -1 2 -1;
125  0 0 -1 2]
126 
127  Let z0 be the solution to the linear system:
128 
129  A A^T * z0 = A * v (5)
130 
131  The problem (5) can be solve by the Thomas Algorithm, in about 5n multiplications and 4n additions.
132 
133  It can also be solved by the Rose's Algorithm, in about 2n multiplications and 2n additions.
134 
135  Moreover, considering the special structure of the matrix A (and B),
136  it can be solved in about n multiplications and 3n additions
137 
138  If lambda \geq \|z0\|_{infty}, x_i= mean(v), for all i,
139  the problem (1) admits near analytical solution
140 
141 
142  We have also added the restart technique, please refer to our paper for detail!
143 
144 */
145 
146 
147 void Thomas(double *zMax, double *z0, double * Av, int nn);
148 
149 void Rose(double *zMax, double *z0, double * Av, int nn);
150 
151 /*
153 
154 x=omega(z)
155 
156 v: the vector to be projected
157 z: the approximate solution
158 g: the gradient at z (g should be computed before calling this function
159 
160 nn: the length of z, g, and S (maximal length for S)
161 
162 n: the length of x and v
163 
164 S: records the indices of the elements in the support set
165 */
166 int supportSet(double *x, double *v, double *z, double *g, int * S, double lambda, int nn);
167 
168 /*
170 
171 we compute the duality corresponding the solution z
172 
173 z: the approximate solution
174 g: the gradient at z (we recompute the gradient)
175 s: an auxiliary variable
176 Av: A*v
177 
178 nn: the lenght for z, g, s, and Av
179 
180 The variables g and s shall be revised.
181 
182 The variables z and Av remain unchanged.
183 */
184 void dualityGap(double *gap, double *z, double *g, double *s, double *Av, double lambda, int nn);
185 
186 /*
187  Similar to dualityGap,
188 
189  The difference is that, we assume that g has been computed.
190  */
191 void dualityGap2(double *gap, double *z, double *g, double *s, double *Av, double lambda, int nn);
192 
193 /*
194 generateSolution:
195 
196 generate the solution x based on the information of z and g
197 (!!!!we assume that g has been computed as the gradient of z!!!!)
198 
199 */
200 int generateSolution(double *x, double *z, double *gap,
201  double *v, double *Av,
202  double *g, double *s, int *S,
203  double lambda, int nn);
204 
205 void restartMapping(double *g, double *z, double * v,
206  double lambda, int nn);
207 
208 /*
210 
211 Our objective is to solve the fused Lasso signal approximator (flsa) problem:
212 
213 min_x g(x) 1/2 \|x-v\|^2 + lambda \|A x\|_1, (1)
214 
215 Let x* be the solution (which is unique), it satisfies
216 
217 0 in x* - v + A^T * lambda *SGN(Ax*) (2)
218 
219 To solve x*, it suffices to find
220 
221 y* in A^T * lambda *SGN(Ax*) (3)
222 that satisfies
223 
224 x* - v + y* =0 (4)
225 which leads to
226 x*= v - y* (5)
227 
228 Due to the uniqueness of x*, we conclude that y* is unique.
229 
230 As y* is a subgradient of lambda \|A x*\|_1,
231 we name our method as Subgradient Finding Algorithm (sfa).
232 
233 y* in (3) can be further written as
234 
235 y*= A^T * z* (6)
236 where
237 
238 z* in lambda* SGN (Ax*) (7)
239 
240 From (6), we have
241 z* = (A A^T)^{-1} A * y* (8)
242 
243 Therefore, from the uqniueness of y*, we conclude that z* is also unique.
244 Next, we discuss how to solve this unique z*.
245 
246 The problem (1) can reformulated as the following equivalent problem:
247 
248 min_x max_z f(x, z)= 1/2 \|x-v\|^2 + <A x, z>
249 subject to \|z\|_{infty} \leq lambda (9)
250 
251 At the saddle point, we have
252 
253 x = v - AT z, (10)
254 
255 which somehow concides with (5) and (6)
256 
257 Plugging (10) into (9), we obtain the problem
258 
259 min_z 1/2 z^T A AT z - < z, A v>,
260 subject to \|z\|_{infty} \leq lambda, (11)
261 
262 In this program, we apply the Nesterov's method for solving (11).
263 
264 
265 Duality gap:
266 
267 At a given point z0, we compute x0= v - A^T z0.
268 It is easy to show that
269 min_x f(x, z0) = f(x0, z0) <= max_z f(x0, z) (12)
270 
271 Moreover, we have
272 max_z f(x0, z) - min_x f(x, z0)
273 <= lambda * \|A x0\|_1 - < z0, Av - A A^T z0> (13)
274 
275 It is also to get that
276 
277 f(x0, z0) <= f(x*, z*) <= max_z f(x0, z) (14)
278 
279 g(x*)=f(x*, z*) (15)
280 
281 g(x0)=max_z f(x0, z) (17)
282 
283  Therefore, we have
284 
285 g(x0)-g(x*) <= lambda * \|A x0\|_1 - < z0, Av - A A^T z0> (18)
286 
287 
288  We have applied a restarting technique, which is quite involved; and thus, we do not explain here.
289 
291  */
292 
293 
294  /*
296 
297  For sfa, the stepsize of the Nesterov's method is fixed to 1/4, so that no line search is needed.
298 
299 
300 
301  Explanation of the parameters:
302 
303  Output parameters
304  x: the solution to the primal problem
305  gap: the duality gap (pointer)
306 
307  Input parameters
308  z: the solution to the dual problem (before calling this function, z contains a starting point)
309  !!!!we assume that the starting point has been successfully initialized in z !!!!
310  z0: a variable used for multiple purposes:
311  1) the previous solution z0
312  2) the difference between z and z0, i.e., z0=z- z0
313 
314  lambda: the regularization parameter (and the radius of the infity ball, see (11)).
315  nn: the length of z, z0, Av, g, and s
316  maxStep: the maximal number of iterations
317 
318  v: the point to be projected (not changed after the program)
319  Av: A*v (not changed after the program)
320 
321  s: the search point (used for multiple purposes)
322  g: the gradient at g (and it is also used for multiple purposes)
323 
324  tol: the tolerance of the gap
325  tau: the duality gap or the restarting technique is done every tau steps
326  flag: if flag=1, we apply the resart technique
327  flag=2, just run the SFA algorithm, terminate it when the absolution change is less than tol
328  flag=3, just run the SFA algorithm, terminate it when the duality gap is less than tol
329  flag=4, just run the SFA algorithm, terminate it when the relative duality gap is less than tol
330 
331 
332  We would like to emphasis that the following assumptions
333  have been checked in the functions that call this function:
334  1) 0< lambda < z_max
335  2) nn >=2
336  3) z has been initialized with a starting point
337  4) z0 has been initialized with all zeros
338 
339  The termination condition is checked every tau iterations.
340 
341  For the duality gap, please refer to (12-18)
342  */
343 int sfa(double *x, double *gap, int * activeS,
344  double *z, double *z0, double * v, double * Av,
345  double lambda, int nn, int maxStep,
346  double *s, double *g,
347  double tol, int tau, int flag);
348 
349 /*
350 
351  Refer to sfa for the defintions of the variables
352 
353  In this file, we restart the program every step, and neglect the gradient step.
354 
355  It seems that, this program does not converge.
356 
357  This function shows that the gradient step is necessary.
358  */
359 int sfa_special(double *x, double *gap, int * activeS,
360  double *z, double * v, double * Av,
361  double lambda, int nn, int maxStep,
362  double *s, double *g,
363  double tol, int tau);
364 
365 /*
366  We do one gradient descent, and then restart the program
367  */
368 int sfa_one(double *x, double *gap, int * activeS,
369  double *z, double * v, double * Av,
370  double lambda, int nn, int maxStep,
371  double *s, double *g,
372  double tol, int tau);
373 #endif //USE_GPL_SHOGUN
374 #endif /* ----- #ifndef SFA_SLEP ----- */
375 

SHOGUN Machine Learning Toolbox - Documentation