SHOGUN  3.2.1
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
sfa.h
Go to the documentation of this file.
1 /* This program is free software: you can redistribute it and/or modify
2  * it under the terms of the GNU General Public License as published by
3  * the Free Software Foundation, either version 3 of the License, or
4  * (at your option) any later version.
5  *
6  * This program is distributed in the hope that it will be useful,
7  * but WITHOUT ANY WARRANTY; without even the implied warranty of
8  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
9  * GNU General Public License for more details.
10  *
11  * You should have received a copy of the GNU General Public License
12  * along with this program. If not, see <http://www.gnu.org/licenses/>.
13  *
14  * Copyright (C) 2009 - 2012 Jun Liu and Jieping Ye
15  */
16 
17 #ifndef SFA_SLEP
18 #define SFA_SLEP
19 
20 #include <shogun/lib/config.h>
21 
22 /*
23  Revision History
24 
25  First Version available on October 10, 2009
26 
27  A runnable version on October 15, 2009
28 
29  Major revision on October 29, 2009
30  (Some functions appearing in a previous version have deleted, please refer to the previous version for the old functions.
31  Some new functions have been added as well)
32 
33 */
34 
35 /*
36 
37  Files contained in this header file sfa.h:
38 
39  1. Algorithms for solving the linear system A A^T z0 = Av (see the description of A from the following context)
40 
41  void Thomas(double *zMax, double *z0,
42  double * Av, int nn)
43 
44  void Rose(double *zMax, double *z0,
45  double * Av, int nn)
46 
47  int supportSet(double *x, double *v, double *z,
48  double *g, int * S, double lambda, int nn)
49 
50  void dualityGap(double *gap, double *z,
51  double *g, double *s, double *Av,
52  double lambda, int nn)
53 
54  void dualityGap2(double *gap, double *z,
55  double *g, double *s, double *Av,
56  double lambda, int nn)
57 
58 
59  2. The Subgraident Finding Algorithm (SFA) for solving problem (4) (refer to the description of the problem for detail)
60 
61  int sfa(double *x, double *gap,
62  double *z, double *z0, double * v, double * Av,
63  double lambda, int nn, int maxStep,
64  double *s, double *g,
65  double tol, int tau, int flag)
66 
67  int sfa_special(double *x, double *gap,
68  double *z, double * v, double * Av,
69  double lambda, int nn, int maxStep,
70  double *s, double *g,
71  double tol, int tau)
72 
73  int sfa_one(double *x, double *gap,
74  double *z, double * v, double * Av,
75  double lambda, int nn, int maxStep,
76  double *s, double *g,
77  double tol, int tau)
78 
79 
80 */
81 
82 
83 /*
84 
85  Some mathematical background.
86 
87  In this file, we discuss how to solve the following subproblem,
88 
89  min_x 1/2 \|x-v\|^2 + lambda \|A x\|_1, (1)
90 
91  which is a key problem used in the Fused Lasso Signal Approximator (FLSA).
92 
93  Also, note that, FLSA is a building block for solving the optimation problmes with fused Lasso penalty.
94 
95  In (1), x and v are n-dimensional vectors,
96  and A is a matrix with size (n-1) x n, and is defined as follows (e.g., n=4):
97  A= [ -1 1 0 0;
98  0 -1 1 0;
99  0 0 -1 1]
100 
101  The above problem can be reformulated as the following equivalent min-max optimization problem
102 
103  min_x max_z 1/2 \|x-v\|^2 + <A x, z>
104  subject to \|z\|_{infty} \leq lambda (2)
105 
106 
107  It is easy to get that, at the optimal point
108 
109  x = v - AT z, (3)
110 
111  where z is the optimal solution to the following optimization problem
112 
113  min_z 1/2 z^T A AT z - < z, A v>,
114  subject to \|z\|_{infty} \leq lambda (4)
115 
116 
117 
118  Let B=A A^T. It is easy to get that B is a (n-1) x (n-1) tridiagonal matrix.
119  When n=5, B is defined as:
120  B= [ 2 -1 0 0;
121  -1 2 -1 0;
122  0 -1 2 -1;
123  0 0 -1 2]
124 
125  Let z0 be the solution to the linear system:
126 
127  A A^T * z0 = A * v (5)
128 
129  The problem (5) can be solve by the Thomas Algorithm, in about 5n multiplications and 4n additions.
130 
131  It can also be solved by the Rose's Algorithm, in about 2n multiplications and 2n additions.
132 
133  Moreover, considering the special structure of the matrix A (and B),
134  it can be solved in about n multiplications and 3n additions
135 
136  If lambda \geq \|z0\|_{infty}, x_i= mean(v), for all i,
137  the problem (1) admits near analytical solution
138 
139 
140  We have also added the restart technique, please refer to our paper for detail!
141 
142 */
143 
144 
145 void Thomas(double *zMax, double *z0, double * Av, int nn);
146 
147 void Rose(double *zMax, double *z0, double * Av, int nn);
148 
149 /*
151 
152 x=omega(z)
153 
154 v: the vector to be projected
155 z: the approximate solution
156 g: the gradient at z (g should be computed before calling this function
157 
158 nn: the length of z, g, and S (maximal length for S)
159 
160 n: the length of x and v
161 
162 S: records the indices of the elements in the support set
163 */
164 int supportSet(double *x, double *v, double *z, double *g, int * S, double lambda, int nn);
165 
166 /*
168 
169 we compute the duality corresponding the solution z
170 
171 z: the approximate solution
172 g: the gradient at z (we recompute the gradient)
173 s: an auxiliary variable
174 Av: A*v
175 
176 nn: the lenght for z, g, s, and Av
177 
178 The variables g and s shall be revised.
179 
180 The variables z and Av remain unchanged.
181 */
182 void dualityGap(double *gap, double *z, double *g, double *s, double *Av, double lambda, int nn);
183 
184 /*
185  Similar to dualityGap,
186 
187  The difference is that, we assume that g has been computed.
188  */
189 void dualityGap2(double *gap, double *z, double *g, double *s, double *Av, double lambda, int nn);
190 
191 /*
192 generateSolution:
193 
194 generate the solution x based on the information of z and g
195 (!!!!we assume that g has been computed as the gradient of z!!!!)
196 
197 */
198 int generateSolution(double *x, double *z, double *gap,
199  double *v, double *Av,
200  double *g, double *s, int *S,
201  double lambda, int nn);
202 
203 void restartMapping(double *g, double *z, double * v,
204  double lambda, int nn);
205 
206 /*
208 
209 Our objective is to solve the fused Lasso signal approximator (flsa) problem:
210 
211 min_x g(x) 1/2 \|x-v\|^2 + lambda \|A x\|_1, (1)
212 
213 Let x* be the solution (which is unique), it satisfies
214 
215 0 in x* - v + A^T * lambda *SGN(Ax*) (2)
216 
217 To solve x*, it suffices to find
218 
219 y* in A^T * lambda *SGN(Ax*) (3)
220 that satisfies
221 
222 x* - v + y* =0 (4)
223 which leads to
224 x*= v - y* (5)
225 
226 Due to the uniqueness of x*, we conclude that y* is unique.
227 
228 As y* is a subgradient of lambda \|A x*\|_1,
229 we name our method as Subgradient Finding Algorithm (sfa).
230 
231 y* in (3) can be further written as
232 
233 y*= A^T * z* (6)
234 where
235 
236 z* in lambda* SGN (Ax*) (7)
237 
238 From (6), we have
239 z* = (A A^T)^{-1} A * y* (8)
240 
241 Therefore, from the uqniueness of y*, we conclude that z* is also unique.
242 Next, we discuss how to solve this unique z*.
243 
244 The problem (1) can reformulated as the following equivalent problem:
245 
246 min_x max_z f(x, z)= 1/2 \|x-v\|^2 + <A x, z>
247 subject to \|z\|_{infty} \leq lambda (9)
248 
249 At the saddle point, we have
250 
251 x = v - AT z, (10)
252 
253 which somehow concides with (5) and (6)
254 
255 Plugging (10) into (9), we obtain the problem
256 
257 min_z 1/2 z^T A AT z - < z, A v>,
258 subject to \|z\|_{infty} \leq lambda, (11)
259 
260 In this program, we apply the Nesterov's method for solving (11).
261 
262 
263 Duality gap:
264 
265 At a given point z0, we compute x0= v - A^T z0.
266 It is easy to show that
267 min_x f(x, z0) = f(x0, z0) <= max_z f(x0, z) (12)
268 
269 Moreover, we have
270 max_z f(x0, z) - min_x f(x, z0)
271 <= lambda * \|A x0\|_1 - < z0, Av - A A^T z0> (13)
272 
273 It is also to get that
274 
275 f(x0, z0) <= f(x*, z*) <= max_z f(x0, z) (14)
276 
277 g(x*)=f(x*, z*) (15)
278 
279 g(x0)=max_z f(x0, z) (17)
280 
281  Therefore, we have
282 
283 g(x0)-g(x*) <= lambda * \|A x0\|_1 - < z0, Av - A A^T z0> (18)
284 
285 
286  We have applied a restarting technique, which is quite involved; and thus, we do not explain here.
287 
289  */
290 
291 
292  /*
294 
295  For sfa, the stepsize of the Nesterov's method is fixed to 1/4, so that no line search is needed.
296 
297 
298 
299  Explanation of the parameters:
300 
301  Output parameters
302  x: the solution to the primal problem
303  gap: the duality gap (pointer)
304 
305  Input parameters
306  z: the solution to the dual problem (before calling this function, z contains a starting point)
307  !!!!we assume that the starting point has been successfully initialized in z !!!!
308  z0: a variable used for multiple purposes:
309  1) the previous solution z0
310  2) the difference between z and z0, i.e., z0=z- z0
311 
312  lambda: the regularization parameter (and the radius of the infity ball, see (11)).
313  nn: the length of z, z0, Av, g, and s
314  maxStep: the maximal number of iterations
315 
316  v: the point to be projected (not changed after the program)
317  Av: A*v (not changed after the program)
318 
319  s: the search point (used for multiple purposes)
320  g: the gradient at g (and it is also used for multiple purposes)
321 
322  tol: the tolerance of the gap
323  tau: the duality gap or the restarting technique is done every tau steps
324  flag: if flag=1, we apply the resart technique
325  flag=2, just run the SFA algorithm, terminate it when the absolution change is less than tol
326  flag=3, just run the SFA algorithm, terminate it when the duality gap is less than tol
327  flag=4, just run the SFA algorithm, terminate it when the relative duality gap is less than tol
328 
329 
330  We would like to emphasis that the following assumptions
331  have been checked in the functions that call this function:
332  1) 0< lambda < z_max
333  2) nn >=2
334  3) z has been initialized with a starting point
335  4) z0 has been initialized with all zeros
336 
337  The termination condition is checked every tau iterations.
338 
339  For the duality gap, please refer to (12-18)
340  */
341 int sfa(double *x, double *gap, int * activeS,
342  double *z, double *z0, double * v, double * Av,
343  double lambda, int nn, int maxStep,
344  double *s, double *g,
345  double tol, int tau, int flag);
346 
347 /*
348 
349  Refer to sfa for the defintions of the variables
350 
351  In this file, we restart the program every step, and neglect the gradient step.
352 
353  It seems that, this program does not converge.
354 
355  This function shows that the gradient step is necessary.
356  */
357 int sfa_special(double *x, double *gap, int * activeS,
358  double *z, double * v, double * Av,
359  double lambda, int nn, int maxStep,
360  double *s, double *g,
361  double tol, int tau);
362 
363 /*
364  We do one gradient descent, and then restart the program
365  */
366 int sfa_one(double *x, double *gap, int * activeS,
367  double *z, double * v, double * Av,
368  double lambda, int nn, int maxStep,
369  double *s, double *g,
370  double tol, int tau);
371 #endif /* ----- #ifndef SFA_SLEP ----- */
372 

SHOGUN Machine Learning Toolbox - Documentation