SHOGUN  3.2.1
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
NeuralNetwork.cpp
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2014, Shogun Toolbox Foundation
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions are met:
7 
8  * 1. Redistributions of source code must retain the above copyright notice,
9  * this list of conditions and the following disclaimer.
10  *
11  * 2. Redistributions in binary form must reproduce the above copyright notice,
12  * this list of conditions and the following disclaimer in the documentation
13  * and/or other materials provided with the distribution.
14  *
15  * 3. Neither the name of the copyright holder nor the names of its
16  * contributors may be used to endorse or promote products derived from this
17  * software without specific prior written permission.
18 
19  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
23  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29  * POSSIBILITY OF SUCH DAMAGE.
30  *
31  * Written (W) 2014 Khaled Nasr
32  */
33 
40 
41 using namespace shogun;
42 
44 : CMachine()
45 {
46  init();
47 }
48 
50 {
51  init();
52  set_layers(layers);
53 }
54 
56 {
58  SG_REF(layers);
59  m_layers = layers;
60 
64 
65  m_num_inputs = 0;
66  for (int32_t i=0; i<m_num_layers; i++)
67  {
68  if (get_layer(i)->is_input())
70  }
71 }
72 
73 void CNeuralNetwork::connect(int32_t i, int32_t j)
74 {
75  REQUIRE("i<j", "i(%i) must be less that j(%i)\n", i, j);
76  m_adj_matrix(i,j) = true;
77 }
78 
80 {
82  for (int32_t i=1; i<m_num_layers; i++)
83  m_adj_matrix(i-1, i) = true;
84 }
85 
86 void CNeuralNetwork::disconnect(int32_t i, int32_t j)
87 {
88  m_adj_matrix(i,j) = false;
89 }
90 
92 {
94 }
95 
97 {
98  for (int32_t j=0; j<m_num_layers; j++)
99  {
100  if (!get_layer(j)->is_input())
101  {
102  int32_t num_inputs = 0;
103  for (int32_t i=0; i<m_num_layers; i++)
104  num_inputs += m_adj_matrix(i,j);
105 
106  SGVector<int32_t> input_indices(num_inputs);
107 
108  int32_t k = 0;
109  for (int i=0; i<m_num_layers; i++)
110  {
111  if (m_adj_matrix(i,j))
112  {
113  input_indices[k] = i;
114  k++;
115  }
116  }
117 
118  get_layer(j)->initialize(m_layers, input_indices);
119  }
120  }
121 
123 
125  m_index_offsets[0] = 0;
126  for (int32_t i=1; i<m_num_layers; i++)
127  {
129  m_total_num_parameters += get_layer(i)->get_num_parameters();
130  }
131 
134 
135  m_params.zero();
136  m_param_regularizable.set_const(true);
137 
138  for (int32_t i=0; i<m_num_layers; i++)
139  {
140  SGVector<float64_t> layer_param = get_section(m_params, i);
141  SGVector<bool> layer_param_regularizable =
142  get_section(m_param_regularizable, i);
143 
144  get_layer(i)->initialize_parameters(layer_param,
145  layer_param_regularizable, sigma);
146 
148  }
149 }
150 
152 {
154 }
155 
157 {
158  SGMatrix<float64_t> output_activations = forward_propagate(data);
159  SGVector<float64_t> labels_vec(m_batch_size);
160 
161  for (int32_t i=0; i<m_batch_size; i++)
162  {
163  if (get_num_outputs()==1)
164  {
165  if (output_activations[i]>0.5) labels_vec[i] = 1;
166  else labels_vec[i] = -1;
167  }
168  else if (get_num_outputs()==2)
169  {
170  if (output_activations[2*i]>output_activations[2*i+1])
171  labels_vec[i] = 1;
172  else labels_vec[i] = -1;
173  }
174  }
175 
176  return new CBinaryLabels(labels_vec);
177 }
178 
180 {
181  SGMatrix<float64_t> output_activations = forward_propagate(data);
182  SGVector<float64_t> labels_vec(m_batch_size);
183 
184  for (int32_t i=0; i<m_batch_size; i++)
185  labels_vec[i] = output_activations[i];
186 
187  return new CRegressionLabels(labels_vec);
188 }
189 
190 
192 {
193  SGMatrix<float64_t> output_activations = forward_propagate(data);
194  SGVector<float64_t> labels_vec(m_batch_size);
195 
196  for (int32_t i=0; i<m_batch_size; i++)
197  {
198  labels_vec[i] = SGVector<float64_t>::arg_max(
199  output_activations.matrix+i*get_num_outputs(), 1, get_num_outputs());
200  }
201 
202  return new CMulticlassLabels(labels_vec);
203 }
204 
207 {
208  SGMatrix<float64_t> output_activations = forward_propagate(data);
209  return new CDenseFeatures<float64_t>(output_activations);
210 }
211 
213 {
215  "Maximum number of epochs (%i) must be >= 0\n", max_num_epochs);
216 
219 
220  for (int32_t i=0; i<m_num_layers-1; i++)
221  {
222  get_layer(i)->dropout_prop =
224  }
225  get_layer(m_num_layers-1)->dropout_prop = 0.0;
226 
227  m_is_training = true;
228  for (int32_t i=0; i<m_num_layers; i++)
229  get_layer(i)->is_training = true;
230 
231  bool result = false;
233  result = train_gradient_descent(inputs, targets);
235  result = train_lbfgs(inputs, targets);
236 
237  for (int32_t i=0; i<m_num_layers; i++)
238  get_layer(i)->is_training = false;
239  m_is_training = false;
240 
241  return result;
242 }
243 
245  SGMatrix<float64_t> targets)
246 {
248  "Gradient descent learning rate (%f) must be > 0\n", gd_learning_rate);
249  REQUIRE(gd_momentum>=0,
250  "Gradient descent momentum (%f) must be > 0\n", gd_momentum);
251 
252  int32_t training_set_size = inputs.num_cols;
253  if (gd_mini_batch_size==0) gd_mini_batch_size = training_set_size;
255 
256  int32_t n_param = get_num_parameters();
257  SGVector<float64_t> gradients(n_param);
258 
259  // needed for momentum
260  SGVector<float64_t> param_updates(n_param);
261  param_updates.zero();
262 
263  float64_t error_last_time = -1.0, error = -1.0;
264 
266  if (c==-1.0)
267  c = 0.99*(float64_t)gd_mini_batch_size/training_set_size + 1e-2;
268 
269  bool continue_training = true;
270  float64_t alpha = gd_learning_rate;
271 
272  for (int32_t i=0; continue_training; i++)
273  {
274  if (max_num_epochs!=0)
275  if (i>=max_num_epochs) break;
276 
277  for (int32_t j=0; j < training_set_size; j += gd_mini_batch_size)
278  {
279  alpha = gd_learning_rate_decay*alpha;
280 
281  if (j+gd_mini_batch_size>training_set_size)
282  j = training_set_size-gd_mini_batch_size;
283 
284  SGMatrix<float64_t> targets_batch(targets.matrix+j*get_num_outputs(),
285  get_num_outputs(), gd_mini_batch_size, false);
286 
287  SGMatrix<float64_t> inputs_batch(inputs.matrix+j*m_num_inputs,
288  m_num_inputs, gd_mini_batch_size, false);
289 
290  for (int32_t k=0; k<n_param; k++)
291  m_params[k] += gd_momentum*param_updates[k];
292 
293  float64_t e = compute_gradients(inputs_batch, targets_batch, gradients);
294 
295  // filter the errors
296  if (error==-1.0)
297  error = e;
298  else
299  error = (1.0-c) * error + c*e;
300 
301  for (int32_t k=0; k<n_param; k++)
302  {
303  param_updates[k] = gd_momentum*param_updates[k]
304  -alpha*gradients[k];
305 
306  m_params[k] -= alpha*gradients[k];
307  }
308 
309  if (error_last_time!=-1.0)
310  {
311  float64_t error_change = (error_last_time-error)/error;
312  if (error_change< epsilon && error_change>=0)
313  {
314  SG_INFO("Gradient Descent Optimization Converged\n");
315  continue_training = false;
316  break;
317  }
318 
319  SG_INFO("Epoch %i: Error = %f\n",i, error);
320  }
321  error_last_time = error;
322  }
323  }
324 
325  return true;
326 }
327 
329  const SGMatrix<float64_t> targets)
330 {
331  int32_t training_set_size = inputs.num_cols;
332  set_batch_size(training_set_size);
333 
334  lbfgs_parameter_t lbfgs_param;
335  lbfgs_parameter_init(&lbfgs_param);
336  lbfgs_param.max_iterations = max_num_epochs;
337  lbfgs_param.epsilon = 0;
338  lbfgs_param.past = 1;
339  lbfgs_param.delta = epsilon;
340 
341  m_lbfgs_temp_inputs = &inputs;
342  m_lbfgs_temp_targets = &targets;
343 
344  int32_t result = lbfgs(m_total_num_parameters,
345  m_params,
346  NULL,
347  &CNeuralNetwork::lbfgs_evaluate,
348  &CNeuralNetwork::lbfgs_progress,
349  this,
350  &lbfgs_param);
351 
352  m_lbfgs_temp_inputs = NULL;
353  m_lbfgs_temp_targets = NULL;
354 
355  if (result==LBFGS_SUCCESS || 1)
356  {
357  SG_INFO("L-BFGS Optimization Converged\n");
358  }
359  else if (result==LBFGSERR_MAXIMUMITERATION)
360  {
361  SG_INFO("L-BFGS Max Number of Epochs reached\n");
362  }
363  else
364  {
365  SG_INFO("L-BFGS optimization ended with return code %i\n",result);
366  }
367  return true;
368 }
369 
370 float64_t CNeuralNetwork::lbfgs_evaluate(void* userdata,
371  const float64_t* W,
372  float64_t* grad,
373  const int32_t n,
374  const float64_t step)
375 {
376  CNeuralNetwork* network = static_cast<CNeuralNetwork*>(userdata);
377 
378  SGVector<float64_t> grad_vector(grad, network->get_num_parameters(), false);
379 
380  return network->compute_gradients(*network->m_lbfgs_temp_inputs,
381  *network->m_lbfgs_temp_targets, grad_vector);
382 }
383 
384 int CNeuralNetwork::lbfgs_progress(void* instance,
385  const float64_t* x,
386  const float64_t* g,
387  const float64_t fx,
388  const float64_t xnorm,
389  const float64_t gnorm,
390  const float64_t step,
391  int n, int k, int ls)
392 {
393  SG_SINFO("Epoch %i: Error = %f\n",k, fx);
394  return 0;
395 }
396 
398 {
401  return forward_propagate(inputs, j);
402 }
403 
405  SGMatrix<float64_t> inputs, int32_t j)
406 {
407  if (j==-1)
408  j = m_num_layers-1;
409 
410  for (int32_t i=0; i<=j; i++)
411  {
412  CNeuralLayer* layer = get_layer(i);
413 
414  if (layer->is_input())
415  layer->compute_activations(inputs);
416  else
417  layer->compute_activations(get_section(m_params, i), m_layers);
418 
419  layer->dropout_activations();
420  }
421 
422  return get_layer(j)->get_activations();
423 }
424 
426  SGMatrix<float64_t> targets, SGVector<float64_t> gradients)
427 {
428  forward_propagate(inputs);
429 
430  for (int32_t i=0; i<m_num_layers; i++)
432 
433  for (int32_t i=m_num_layers-1; i>=0; i--)
434  {
435  if (i==m_num_layers-1)
436  get_layer(i)->compute_gradients(get_section(m_params,i), targets,
437  m_layers, get_section(gradients,i));
438  else
439  get_layer(i)->compute_gradients(get_section(m_params,i),
440  SGMatrix<float64_t>(), m_layers, get_section(gradients,i));
441  }
442 
443  // L2 regularization
444  if (l2_coefficient != 0.0)
445  {
446  for (int32_t i=0; i<m_total_num_parameters; i++)
447  {
448  if (m_param_regularizable[i])
449  gradients[i] += l2_coefficient*m_params[i];
450  }
451  }
452 
453  // L1 regularization
454  if (l1_coefficient != 0.0)
455  {
456  for (int32_t i=0; i<m_total_num_parameters; i++)
457  {
458  if (m_param_regularizable[i])
459  gradients[i] +=
460  l1_coefficient*CMath::sign<float64_t>(m_params[i]);
461  }
462  }
463 
464  // max-norm regularization
465  if (max_norm != -1.0)
466  {
467  for (int32_t i=0; i<m_num_layers; i++)
468  {
469  SGVector<float64_t> layer_params = get_section(m_params,i);
470  get_layer(i)->enforce_max_norm(layer_params, max_norm);
471  }
472  }
473 
474  return compute_error(targets);
475 }
476 
478 {
479  float64_t error = get_layer(m_num_layers-1)->compute_error(targets);
480 
481  // L2 regularization
482  if (l2_coefficient != 0.0)
483  {
484  for (int32_t i=0; i<m_total_num_parameters; i++)
485  {
486  if (m_param_regularizable[i])
487  error += 0.5*l2_coefficient*m_params[i]*m_params[i];
488  }
489  }
490 
491  // L1 regularization
492  if (l1_coefficient != 0.0)
493  {
494  for (int32_t i=0; i<m_total_num_parameters; i++)
495  {
496  if (m_param_regularizable[i])
497  error += l1_coefficient*CMath::abs(m_params[i]);
498  }
499  }
500 
501  return error;
502 }
503 
505  SGMatrix<float64_t> targets)
506 {
507  forward_propagate(inputs);
508  return compute_error(targets);
509 }
510 
511 
513 {
514  // some random inputs and ouputs
517 
518  for (int32_t i=0; i<x.num_rows; i++)
519  x[i] = CMath::random(0.0,1.0);
520 
521  // the outputs are set up in the form of a probability distribution (in case
522  // that is required by the output layer, i.e softmax)
523  for (int32_t i=0; i<y.num_rows; i++)
524  y[i] = CMath::random(0.0,1.0);
525 
527  for (int32_t i=0; i<y.num_rows; i++)
528  y[i] /= y_sum;
529 
530  set_batch_size(1);
531 
532  // numerically compute gradients
533  SGVector<float64_t> gradients_numerical(m_total_num_parameters);
534 
535  for (int32_t i=0; i<m_total_num_parameters; i++)
536  {
537  float64_t c =
538  CMath::max<float64_t>(CMath::abs(approx_epsilon*m_params[i]),s);
539 
540  m_params[i] += c;
541  float64_t error_plus = compute_error(x,y);
542  m_params[i] -= 2*c;
543  float64_t error_minus = compute_error(x,y);
544  m_params[i] += c;
545 
546  gradients_numerical[i] = (error_plus-error_minus)/(2*c);
547  }
548 
549  // compute gradients using backpropagation
550  SGVector<float64_t> gradients_backprop(m_total_num_parameters);
551  compute_gradients(x, y, gradients_backprop);
552 
553  float64_t sum = 0.0;
554  for (int32_t i=0; i<m_total_num_parameters; i++)
555  {
556  sum += CMath::abs(gradients_backprop[i]-gradients_numerical[i]);
557  }
558 
559  return sum/m_total_num_parameters;
560 }
561 
562 void CNeuralNetwork::set_batch_size(int32_t batch_size)
563 {
564  if (batch_size!=m_batch_size)
565  {
566  m_batch_size = batch_size;
567  for (int32_t i=0; i<m_num_layers; i++)
569  }
570 }
571 
573 {
574  REQUIRE(features != NULL, "Invalid (NULL) feature pointer\n");
575  REQUIRE(features->get_feature_type() == F_DREAL,
576  "Feature type must be F_DREAL\n");
577  REQUIRE(features->get_feature_class() == C_DENSE,
578  "Feature class must be C_DENSE\n");
579 
581  REQUIRE(inputs->get_num_features()==m_num_inputs,
582  "Number of features (%i) must match the network's number of inputs "
583  "(%i)\n", inputs->get_num_features(), get_num_inputs());
584 
585  return inputs->get_feature_matrix();
586 }
587 
589 {
590  REQUIRE(labs != NULL, "Invalid (NULL) labels pointer\n");
591 
593  targets.zero();
594 
595  if (labs->get_label_type() == LT_MULTICLASS)
596  {
597  CMulticlassLabels* labels_mc = (CMulticlassLabels*) labs;
598  REQUIRE(labels_mc->get_num_classes()==get_num_outputs(),
599  "Number of classes (%i) must match the network's number of "
600  "outputs (%i)\n", labels_mc->get_num_classes(), get_num_outputs());
601 
602  for (int32_t i=0; i<labels_mc->get_num_labels(); i++)
603  targets[((int32_t)labels_mc->get_label(i))+ i*get_num_outputs()]
604  = 1.0;
605  }
606  else if (labs->get_label_type() == LT_BINARY)
607  {
608  CBinaryLabels* labels_bin = (CBinaryLabels*) labs;
609  if (get_num_outputs()==1)
610  {
611  for (int32_t i=0; i<labels_bin->get_num_labels(); i++)
612  targets[i] = (labels_bin->get_label(i)==1);
613  }
614  else if (get_num_outputs()==2)
615  {
616  for (int32_t i=0; i<labels_bin->get_num_labels(); i++)
617  {
618  targets[i*2] = (labels_bin->get_label(i)==1);
619  targets[i*2+1] = (labels_bin->get_label(i)==-1);
620  }
621  }
622  }
623  else if (labs->get_label_type() == LT_REGRESSION)
624  {
625  CRegressionLabels* labels_reg = (CRegressionLabels*) labs;
626  for (int32_t i=0; i<labels_reg->get_num_labels(); i++)
627  targets[i] = labels_reg->get_label(i);
628  }
629 
630  return targets;
631 }
632 
634 {
635  // problem type depends on the type of labels given to the network
636  // if no labels are given yet, just return PT_MULTICLASS
637  if (m_labels==NULL)
638  return PT_MULTICLASS;
639 
641  return PT_BINARY;
642  else if (m_labels->get_label_type() == LT_REGRESSION)
643  return PT_REGRESSION;
644  else return PT_MULTICLASS;
645 }
646 
648 {
649  return (lab->get_label_type() == LT_MULTICLASS ||
650  lab->get_label_type() == LT_BINARY ||
651  lab->get_label_type() == LT_REGRESSION);
652 }
653 
655 {
656  if (lab->get_label_type() == LT_BINARY)
657  {
658  REQUIRE(get_num_outputs() <= 2, "Cannot use %s in a neural network "
659  "with more that 2 output neurons\n", lab->get_name());
660  }
661  else if (lab->get_label_type() == LT_REGRESSION)
662  {
663  REQUIRE(get_num_outputs() == 1, "Cannot use %s in a neural network "
664  "with more that 1 output neuron\n", lab->get_name());
665  }
666 
668 }
669 
671 {
672  REQUIRE(i<m_num_layers && i >= 0, "Layer index (%i) out of range\n", i);
673 
674  int32_t n = get_layer(i)->get_num_parameters();
676 
677  memcpy(p->vector, get_section(m_params, i), n*sizeof(float64_t));
678  return p;
679 }
680 
682 {
683  CNeuralLayer* layer = (CNeuralLayer*)m_layers->element(i);
684  // needed because m_layers->element(i) increases the reference count of
685  // layer i
686  SG_UNREF(layer);
687  return layer;
688 }
689 
690 template <class T>
691 SGVector<T> CNeuralNetwork::get_section(SGVector<T> v, int32_t i)
692 {
693  return SGVector<T>(v.vector+m_index_offsets[i],
694  get_layer(i)->get_num_parameters(), false);
695 }
696 
698 {
700 }
701 
703 {
704  SG_REF(m_layers);
705  return m_layers;
706 }
707 
708 void CNeuralNetwork::init()
709 {
711  dropout_hidden = 0.0;
712  dropout_input = 0.0;
713  max_norm = -1.0;
714  l2_coefficient = 0.0;
715  l1_coefficient = 0.0;
716  gd_mini_batch_size = 0;
717  max_num_epochs = 0;
718  gd_learning_rate = 0.1;
720  gd_momentum = 0.9;
721  gd_error_damping_coeff = -1.0;
722  epsilon = 1.0e-5;
723  m_num_inputs = 0;
724  m_num_layers = 0;
725  m_layers = NULL;
727  m_batch_size = 1;
728  m_lbfgs_temp_inputs = NULL;
729  m_lbfgs_temp_targets = NULL;
730  m_is_training = false;
731 
732  SG_ADD((machine_int_t*)&optimization_method, "optimization_method",
733  "Optimization Method", MS_NOT_AVAILABLE);
734  SG_ADD(&gd_mini_batch_size, "gd_mini_batch_size",
735  "Gradient Descent Mini-batch size", MS_NOT_AVAILABLE);
736  SG_ADD(&max_num_epochs, "max_num_epochs",
737  "Max number of Epochs", MS_NOT_AVAILABLE);
738  SG_ADD(&gd_learning_rate, "gd_learning_rate",
739  "Gradient descent learning rate", MS_NOT_AVAILABLE);
740  SG_ADD(&gd_learning_rate_decay, "gd_learning_rate_decay",
741  "Gradient descent learning rate decay", MS_NOT_AVAILABLE);
742  SG_ADD(&gd_momentum, "gd_momentum",
743  "Gradient Descent Momentum", MS_NOT_AVAILABLE);
744  SG_ADD(&gd_error_damping_coeff, "gd_error_damping_coeff",
745  "Gradient Descent Error Damping Coeff", MS_NOT_AVAILABLE);
746  SG_ADD(&epsilon, "epsilon",
747  "Epsilon", MS_NOT_AVAILABLE);
748  SG_ADD(&m_num_inputs, "num_inputs",
749  "Number of Inputs", MS_NOT_AVAILABLE);
750  SG_ADD(&m_num_layers, "num_layers",
751  "Number of Layers", MS_NOT_AVAILABLE);
752  SG_ADD(&m_adj_matrix, "adj_matrix",
753  "Adjacency Matrix", MS_NOT_AVAILABLE);
754  SG_ADD(&l2_coefficient, "l2_coefficient",
755  "L2 regularization coeff", MS_NOT_AVAILABLE);
756  SG_ADD(&l1_coefficient, "l1_coefficient",
757  "L1 regularization coeff", MS_NOT_AVAILABLE);
758  SG_ADD(&dropout_hidden, "dropout_hidden",
759  "Hidden neuron dropout probability", MS_NOT_AVAILABLE);
760  SG_ADD(&dropout_input, "dropout_input",
761  "Input neuron dropout probability", MS_NOT_AVAILABLE);
762  SG_ADD(&max_norm, "max_norm",
763  "Max Norm", MS_NOT_AVAILABLE);
764  SG_ADD(&m_total_num_parameters, "total_num_parameters",
765  "Total number of parameters", MS_NOT_AVAILABLE);
766  SG_ADD(&m_index_offsets, "index_offsets",
767  "Index Offsets", MS_NOT_AVAILABLE);
768  SG_ADD(&m_params, "params",
769  "Parameters", MS_NOT_AVAILABLE);
770  SG_ADD(&m_param_regularizable, "param_regularizable",
771  "Parameter Regularizable", MS_NOT_AVAILABLE);
772  SG_ADD((CSGObject**)&m_layers, "layers",
773  "DynamicObjectArray of NeuralNetwork objects",
775  SG_ADD(&m_is_training, "is_training",
776  "is_training", MS_NOT_AVAILABLE);
777 }

SHOGUN Machine Learning Toolbox - Documentation