Cross Validation on Multiple Kernel Learning

Cross Validation is a model validation technique whose purpose is to give an insight on how the model we are testing will generalize to an independent dataset. Essentially, it is based on training and test the model many times on different complementary partitions of the original training dataset and then to combine the validation results (e.g. average) to estimate a the performance of the final predictive model.

Example

We’ll use as example a classification problem solvable by using CMKLClassification. For the sake of brevity, we’ll skip the initialization of features, kernels and so on (see Multiple Kernel Learning for a more complete example of MKL usage).

libsvm = LibSVM()
svm = MKLClassification(libsvm)
svm.set_interleaved_optimization_enabled(False)
svm.set_kernel(kernel)
libsvm = LibSVM();
svm = MKLClassification(libsvm);
svm.set_interleaved_optimization_enabled(false);
svm.set_kernel(kernel);
LibSVM libsvm = new LibSVM();
MKLClassification svm = new MKLClassification(libsvm);
svm.set_interleaved_optimization_enabled(false);
svm.set_kernel(kernel);
libsvm = Shogun::LibSVM.new 
svm = Shogun::MKLClassification.new libsvm
svm.set_interleaved_optimization_enabled false
svm.set_kernel kernel
libsvm <- LibSVM()
svm <- MKLClassification(libsvm)
svm$set_interleaved_optimization_enabled(FALSE)
svm$set_kernel(kernel)
libsvm = shogun.LibSVM()
svm = shogun.MKLClassification(libsvm)
svm:set_interleaved_optimization_enabled(False)
svm:set_kernel(kernel)
LibSVM libsvm = new LibSVM();
MKLClassification svm = new MKLClassification(libsvm);
svm.set_interleaved_optimization_enabled(false);
svm.set_kernel(kernel);
auto libsvm = some<CLibSVM>();
auto svm = some<CMKLClassification>(libsvm);
svm->set_interleaved_optimization_enabled(false);
svm->set_kernel(kernel);

Firstly, we initialize a splitting strategy CStratifiedCrossValidationSplitting, which is needed to divide the dataset into folds, and an evaluation criterium CAccuracyMeasure, to evaluate the performance of the trained models. Secondly, we create the CCrossValidation instance. We set also the number of cross validation’s runs.

splitting_strategy = StratifiedCrossValidationSplitting(labels, 5)
evaluation_criterium = AccuracyMeasure()
cross = CrossValidation(svm, combined_features, labels, splitting_strategy, evaluation_criterium)
cross.set_autolock(False)
cross.set_num_runs(2)
splitting_strategy = StratifiedCrossValidationSplitting(labels, 5);
evaluation_criterium = AccuracyMeasure();
cross = CrossValidation(svm, combined_features, labels, splitting_strategy, evaluation_criterium);
cross.set_autolock(false);
cross.set_num_runs(2);
StratifiedCrossValidationSplitting splitting_strategy = new StratifiedCrossValidationSplitting(labels, 5);
AccuracyMeasure evaluation_criterium = new AccuracyMeasure();
CrossValidation cross = new CrossValidation(svm, combined_features, labels, splitting_strategy, evaluation_criterium);
cross.set_autolock(false);
cross.set_num_runs(2);
splitting_strategy = Shogun::StratifiedCrossValidationSplitting.new labels, 5
evaluation_criterium = Shogun::AccuracyMeasure.new 
cross = Shogun::CrossValidation.new svm, combined_features, labels, splitting_strategy, evaluation_criterium
cross.set_autolock false
cross.set_num_runs 2
splitting_strategy <- StratifiedCrossValidationSplitting(labels, 5)
evaluation_criterium <- AccuracyMeasure()
cross <- CrossValidation(svm, combined_features, labels, splitting_strategy, evaluation_criterium)
cross$set_autolock(FALSE)
cross$set_num_runs(2)
splitting_strategy = shogun.StratifiedCrossValidationSplitting(labels, 5)
evaluation_criterium = shogun.AccuracyMeasure()
cross = shogun.CrossValidation(svm, combined_features, labels, splitting_strategy, evaluation_criterium)
cross:set_autolock(False)
cross:set_num_runs(2)
StratifiedCrossValidationSplitting splitting_strategy = new StratifiedCrossValidationSplitting(labels, 5);
AccuracyMeasure evaluation_criterium = new AccuracyMeasure();
CrossValidation cross = new CrossValidation(svm, combined_features, labels, splitting_strategy, evaluation_criterium);
cross.set_autolock(false);
cross.set_num_runs(2);
auto splitting_strategy = some<CStratifiedCrossValidationSplitting>(labels, 5);
auto evaluation_criterium = some<CAccuracyMeasure>();
auto cross = some<CCrossValidation>(svm, combined_features, labels, splitting_strategy, evaluation_criterium);
cross->set_autolock(false);
cross->set_num_runs(2);

To observe also the partial folds’ results, we create a cross validation’s observer CParameterObserverCV and then we register it into the CCrossValidation instance.

mkl_obs = ParameterObserverCV(True)
cross.subscribe_to_parameters(mkl_obs)
mkl_obs = ParameterObserverCV(true);
cross.subscribe_to_parameters(mkl_obs);
ParameterObserverCV mkl_obs = new ParameterObserverCV(true);
cross.subscribe_to_parameters(mkl_obs);
mkl_obs = Shogun::ParameterObserverCV.new true
cross.subscribe_to_parameters mkl_obs
mkl_obs <- ParameterObserverCV(TRUE)
cross$subscribe_to_parameters(mkl_obs)
mkl_obs = shogun.ParameterObserverCV(True)
cross:subscribe_to_parameters(mkl_obs)
ParameterObserverCV mkl_obs = new ParameterObserverCV(true);
cross.subscribe_to_parameters(mkl_obs);
auto mkl_obs = some<CParameterObserverCV>(true);
cross->subscribe_to_parameters(mkl_obs);

Finally, we evaluate the model and get the results (aka a CCrossValidationResult instance).

result = CrossValidationResult()
result = CrossValidationResult.obtain_from_generic(cross.evaluate())
result = CrossValidationResult();
result = CrossValidationResult.obtain_from_generic(cross.evaluate());
CrossValidationResult result = new CrossValidationResult();
result = CrossValidationResult.obtain_from_generic(cross.evaluate());
result = Shogun::CrossValidationResult.new 
result = Shogun::CrossValidationResult.obtain_from_generic cross.evaluate 
result <- CrossValidationResult()
result <- CrossValidationResult$obtain_from_generic(cross$evaluate())
result = shogun.CrossValidationResult()
result = CrossValidationResult:obtain_from_generic(cross:evaluate())
CrossValidationResult result = new CrossValidationResult();
result = CrossValidationResult.obtain_from_generic(cross.evaluate());
auto result = some<CCrossValidationResult>();
result = CCrossValidationResult::obtain_from_generic(cross->evaluate());

We get the \(mean\) of all the evaluation results and its standard deviation \(stddev\).

mean = result.get_mean()
stddev = result.get_std_dev()
mean = result.get_mean();
stddev = result.get_std_dev();
double mean = result.get_mean();
double stddev = result.get_std_dev();
mean = result.get_mean 
stddev = result.get_std_dev 
mean <- result$get_mean()
stddev <- result$get_std_dev()
mean = result:get_mean()
stddev = result:get_std_dev()
double mean = result.get_mean();
double stddev = result.get_std_dev();
auto mean = result->get_mean();
auto stddev = result->get_std_dev();

We can get more information about the single cross validation’s runs and folds by using the observer we set before, like the kernels’ weights. We get the CMKLClassification machine used during the first run and trained on the first fold.

obs = mkl_obs.get_observation(0)
fold = obs.get_fold(0)
machine = MKLClassification.obtain_from_generic(fold.get_trained_machine())
obs = mkl_obs.get_observation(0);
fold = obs.get_fold(0);
machine = MKLClassification.obtain_from_generic(fold.get_trained_machine());
CrossValidationStorage obs = mkl_obs.get_observation(0);
CrossValidationFoldStorage fold = obs.get_fold(0);
MKLClassification machine = MKLClassification.obtain_from_generic(fold.get_trained_machine());
obs = mkl_obs.get_observation 0
fold = obs.get_fold 0
machine = Shogun::MKLClassification.obtain_from_generic fold.get_trained_machine 
obs <- mkl_obs$get_observation(0)
fold <- obs$get_fold(0)
machine <- MKLClassification$obtain_from_generic(fold$get_trained_machine())
obs = mkl_obs:get_observation(0)
fold = obs:get_fold(0)
machine = MKLClassification:obtain_from_generic(fold:get_trained_machine())
CrossValidationStorage obs = mkl_obs.get_observation(0);
CrossValidationFoldStorage fold = obs.get_fold(0);
MKLClassification machine = MKLClassification.obtain_from_generic(fold.get_trained_machine());
auto obs = mkl_obs->get_observation(0);
auto fold = obs->get_fold(0);
auto machine = CMKLClassification::obtain_from_generic(fold->get_trained_machine());

Then, from the trained machine, we get the weights \(\mathbf{w}\) of its kernels.

k = CombinedKernel.obtain_from_generic(machine.get_kernel())
w = k.get_subkernel_weights()
k = CombinedKernel.obtain_from_generic(machine.get_kernel());
w = k.get_subkernel_weights();
CombinedKernel k = CombinedKernel.obtain_from_generic(machine.get_kernel());
DoubleMatrix w = k.get_subkernel_weights();
k = Shogun::CombinedKernel.obtain_from_generic machine.get_kernel 
w = k.get_subkernel_weights 
k <- CombinedKernel$obtain_from_generic(machine$get_kernel())
w <- k$get_subkernel_weights()
k = CombinedKernel:obtain_from_generic(machine:get_kernel())
w = k:get_subkernel_weights()
CombinedKernel k = CombinedKernel.obtain_from_generic(machine.get_kernel());
double[] w = k.get_subkernel_weights();
auto k = CCombinedKernel::obtain_from_generic(machine->get_kernel());
auto w = k->get_subkernel_weights();