Feedforward Network for Classification

Feedforward network or multi-layer perceptron defines a mapping \(y = f(\mathbf{x};\mathbf{\theta})\) from an input \(\mathbf{x}\) to a category \(y\) and learns the value of parameters \(\mathbf{\theta}\) by iterative training that results in the best function approximation. The network is a directed acyclic graph composed of an input layer, an output layer and a few hidden layers.

For example,

\[f(\mathbf{x}) = f^{(3)}(f^{(2)}(f^{(1)}(\mathbf{x})))\]

where \(\mathbf{x}\) is the input layer, \(f^{(1)}\) and \(f^{(2)}\) are hidden layers and \(f^{(3)}\) is the output layer.

See chapter 6 in [GBC16] for a detailed introduction.

Example

Imagine we have files with training and test data. We create CDenseFeatures (here 64 bit floats aka RealFeatures) and CBinaryLabels as

features_train = RealFeatures(f_feats_train)
features_test = RealFeatures(f_feats_test)
labels_train = BinaryLabels(f_labels_train)
labels_test = BinaryLabels(f_labels_test)
features_train = RealFeatures(f_feats_train);
features_test = RealFeatures(f_feats_test);
labels_train = BinaryLabels(f_labels_train);
labels_test = BinaryLabels(f_labels_test);
RealFeatures features_train = new RealFeatures(f_feats_train);
RealFeatures features_test = new RealFeatures(f_feats_test);
BinaryLabels labels_train = new BinaryLabels(f_labels_train);
BinaryLabels labels_test = new BinaryLabels(f_labels_test);
features_train = Modshogun::RealFeatures.new f_feats_train
features_test = Modshogun::RealFeatures.new f_feats_test
labels_train = Modshogun::BinaryLabels.new f_labels_train
labels_test = Modshogun::BinaryLabels.new f_labels_test
features_train <- RealFeatures(f_feats_train)
features_test <- RealFeatures(f_feats_test)
labels_train <- BinaryLabels(f_labels_train)
labels_test <- BinaryLabels(f_labels_test)
features_train = modshogun.RealFeatures(f_feats_train)
features_test = modshogun.RealFeatures(f_feats_test)
labels_train = modshogun.BinaryLabels(f_labels_train)
labels_test = modshogun.BinaryLabels(f_labels_test)
RealFeatures features_train = new RealFeatures(f_feats_train);
RealFeatures features_test = new RealFeatures(f_feats_test);
BinaryLabels labels_train = new BinaryLabels(f_labels_train);
BinaryLabels labels_test = new BinaryLabels(f_labels_test);
auto features_train = some<CDenseFeatures<float64_t>>(f_feats_train);
auto features_test = some<CDenseFeatures<float64_t>>(f_feats_test);
auto labels_train = some<CBinaryLabels>(f_labels_train);
auto labels_test = some<CBinaryLabels>(f_labels_test);

We create instances of CNeuralInputLayer, CNeuralLinearLayer and NeuralSoftmaxLayer which are building blocks of CNeuralNetwork

num_feats = features_train.get_num_features()
layers = NeuralLayers()
layers = layers.input(num_feats)
layers = layers.rectified_linear(10)
layers = layers.softmax(2)
all_layers = layers.done()
num_feats = features_train.get_num_features();
layers = NeuralLayers();
layers = layers.input(num_feats);
layers = layers.rectified_linear(10);
layers = layers.softmax(2);
all_layers = layers.done();
int num_feats = features_train.get_num_features();
NeuralLayers layers = new NeuralLayers();
layers = layers.input(num_feats);
layers = layers.rectified_linear(10);
layers = layers.softmax(2);
DynamicObjectArray all_layers = layers.done();
num_feats = features_train.get_num_features 
layers = Modshogun::NeuralLayers.new 
layers = layers.input num_feats
layers = layers.rectified_linear 10
layers = layers.softmax 2
all_layers = layers.done 
num_feats <- features_train$get_num_features()
layers <- NeuralLayers()
layers <- layers$input(num_feats)
layers <- layers$rectified_linear(10)
layers <- layers$softmax(2)
all_layers <- layers$done()
num_feats = features_train:get_num_features()
layers = modshogun.NeuralLayers()
layers = layers:input(num_feats)
layers = layers:rectified_linear(10)
layers = layers:softmax(2)
all_layers = layers:done()
int num_feats = features_train.get_num_features();
NeuralLayers layers = new NeuralLayers();
layers = layers.input(num_feats);
layers = layers.rectified_linear(10);
layers = layers.softmax(2);
DynamicObjectArray all_layers = layers.done();
auto num_feats = features_train->get_num_features();
auto layers = some<CNeuralLayers>();
layers = layers->input(num_feats);
layers = layers->rectified_linear(10);
layers = layers->softmax(2);
auto all_layers = layers->done();

We create a CNeuralNetwork instance by using the above layers and randomly initialize the network parameters by sampling from a gaussian distribution.

network = NeuralNetwork(all_layers)
network.quick_connect()
network.initialize_neural_network()
network = NeuralNetwork(all_layers);
network.quick_connect();
network.initialize_neural_network();
NeuralNetwork network = new NeuralNetwork(all_layers);
network.quick_connect();
network.initialize_neural_network();
network = Modshogun::NeuralNetwork.new all_layers
network.quick_connect 
network.initialize_neural_network 
network <- NeuralNetwork(all_layers)
network$quick_connect()
network$initialize_neural_network()
network = modshogun.NeuralNetwork(all_layers)
network:quick_connect()
network:initialize_neural_network()
NeuralNetwork network = new NeuralNetwork(all_layers);
network.quick_connect();
network.initialize_neural_network();
auto network = some<CNeuralNetwork>(all_layers);
network->quick_connect();
network->initialize_neural_network();

Before training, we need to set appropriate parameters like regularization coefficient, dropout probabilities, learning rate, etc. as shown below. More parameters can be found in the documentation of CNeuralNetwork.

network.set_l2_coefficient(0.01)
network.set_dropout_hidden(0.5)
network.set_max_num_epochs(50)
network.set_gd_mini_batch_size(num_feats)
network.set_gd_learning_rate(0.1)
network.set_gd_momentum(0.9)
network.set_l2_coefficient(0.01);
network.set_dropout_hidden(0.5);
network.set_max_num_epochs(50);
network.set_gd_mini_batch_size(num_feats);
network.set_gd_learning_rate(0.1);
network.set_gd_momentum(0.9);
network.set_l2_coefficient(0.01);
network.set_dropout_hidden(0.5);
network.set_max_num_epochs(50);
network.set_gd_mini_batch_size(num_feats);
network.set_gd_learning_rate(0.1);
network.set_gd_momentum(0.9);
network.set_l2_coefficient 0.01
network.set_dropout_hidden 0.5
network.set_max_num_epochs 50
network.set_gd_mini_batch_size num_feats
network.set_gd_learning_rate 0.1
network.set_gd_momentum 0.9
network$set_l2_coefficient(0.01)
network$set_dropout_hidden(0.5)
network$set_max_num_epochs(50)
network$set_gd_mini_batch_size(num_feats)
network$set_gd_learning_rate(0.1)
network$set_gd_momentum(0.9)
network:set_l2_coefficient(0.01)
network:set_dropout_hidden(0.5)
network:set_max_num_epochs(50)
network:set_gd_mini_batch_size(num_feats)
network:set_gd_learning_rate(0.1)
network:set_gd_momentum(0.9)
network.set_l2_coefficient(0.01);
network.set_dropout_hidden(0.5);
network.set_max_num_epochs(50);
network.set_gd_mini_batch_size(num_feats);
network.set_gd_learning_rate(0.1);
network.set_gd_momentum(0.9);
network->set_l2_coefficient(0.01);
network->set_dropout_hidden(0.5);
network->set_max_num_epochs(50);
network->set_gd_mini_batch_size(num_feats);
network->set_gd_learning_rate(0.1);
network->set_gd_momentum(0.9);

We train the model and apply it to some test data.

network.set_labels(labels_train)
network.train(features_train)
labels_predict = network.apply_binary(features_test)
network.set_labels(labels_train);
network.train(features_train);
labels_predict = network.apply_binary(features_test);
network.set_labels(labels_train);
network.train(features_train);
BinaryLabels labels_predict = network.apply_binary(features_test);
network.set_labels labels_train
network.train features_train
labels_predict = network.apply_binary features_test
network$set_labels(labels_train)
network$train(features_train)
labels_predict <- network$apply_binary(features_test)
network:set_labels(labels_train)
network:train(features_train)
labels_predict = network:apply_binary(features_test)
network.set_labels(labels_train);
network.train(features_train);
BinaryLabels labels_predict = network.apply_binary(features_test);
network->set_labels(labels_train);
network->train(features_train);
auto labels_predict = network->apply_binary(features_test);

We can extract the parameters of the trained network.

parameters = network.get_parameters()
parameters = network.get_parameters();
DoubleMatrix parameters = network.get_parameters();
parameters = network.get_parameters 
parameters <- network$get_parameters()
parameters = network:get_parameters()
double[] parameters = network.get_parameters();
auto parameters = network->get_parameters();

Finally, we compute accuracy.

am = AccuracyMeasure()
accuracy = am.evaluate(labels_predict, labels_test)
am = AccuracyMeasure();
accuracy = am.evaluate(labels_predict, labels_test);
AccuracyMeasure am = new AccuracyMeasure();
double accuracy = am.evaluate(labels_predict, labels_test);
am = Modshogun::AccuracyMeasure.new 
accuracy = am.evaluate labels_predict, labels_test
am <- AccuracyMeasure()
accuracy <- am$evaluate(labels_predict, labels_test)
am = modshogun.AccuracyMeasure()
accuracy = am:evaluate(labels_predict, labels_test)
AccuracyMeasure am = new AccuracyMeasure();
double accuracy = am.evaluate(labels_predict, labels_test);
auto am = some<CAccuracyMeasure>();
auto accuracy = am->evaluate(labels_predict, labels_test);

References

Wikipedia: Artificial_neural_network

[GBC16]I. Goodfellow, Y. Bengio, and A. Courville. Deep learning. Book in preparation for MIT Press, 2016.