SHOGUN  6.0.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules
MultitaskKernelTreeNormalizer.h
Go to the documentation of this file.
1 /*
2  * This program is free software; you can redistribute it and/or modify
3  * it under the terms of the GNU General Public License as published by
4  * the Free Software Foundation; either version 2 of the License, or
5  * (at your option) any later version.
6  *
7  * Written (W) 2010 Christian Widmer
8  * Copyright (C) 2010 Max-Planck-Society
9  */
10 
11 #ifndef _MULTITASKKERNELTREENORMALIZER_H___
12 #define _MULTITASKKERNELTREENORMALIZER_H___
13 
14 #include <shogun/lib/config.h>
15 
17 #include <shogun/kernel/Kernel.h>
18 #include <algorithm>
19 #include <iterator>
20 #include <map>
21 #include <set>
22 #include <deque>
23 #include <vector>
24 
25 namespace shogun
26 {
27 
32 class CNode: public CSGObject
33 {
34 public:
38  {
39  parent = NULL;
40  beta = 1.0;
41  node_id = 0;
42  }
43 
44  virtual ~CNode()
45  {
46  for (size_t i = 0; i < children.size(); i++)
47  delete children[i];
48  }
49 
53  std::set<CNode*> get_path_root()
54  {
55  std::set<CNode*> nodes_on_path = std::set<CNode*>();
56  CNode *node = this;
57  while (node != NULL) {
58  nodes_on_path.insert(node);
59  node = node->parent;
60  }
61  return nodes_on_path;
62  }
63 
67  std::vector<int32_t> get_task_ids_below()
68  {
69 
70  std::vector<int32_t> task_ids;
71  std::deque<CNode*> grey_nodes;
72  grey_nodes.push_back(this);
73 
74  while(grey_nodes.size() > 0)
75  {
76 
77  CNode *current_node = grey_nodes.front();
78  grey_nodes.pop_front();
79 
80  for(int32_t i = 0; i!=int32_t(current_node->children.size()); i++){
81  grey_nodes.push_back(current_node->children[i]);
82  }
83 
84  if(current_node->is_leaf()){
85  task_ids.push_back(current_node->getNode_id());
86  }
87  }
88 
89  return task_ids;
90  }
91 
96  {
97  node->parent = this;
98  this->children.push_back(node);
99  }
100 
102  virtual const char *get_name() const
103  {
104  return "Node";
105  }
106 
108  bool is_leaf()
109  {
110  return children.empty();
111 
112  }
113 
115  int32_t getNode_id() const
116  {
117  return node_id;
118  }
119 
121  void setNode_id(int32_t node_idx)
122  {
123  this->node_id = node_idx;
124  }
125 
128 
129 protected:
130 
133 
135  std::vector<CNode*> children;
136 
138  int32_t node_id;
139 
140 };
141 
142 
148 {
149 
150 public:
151 
155  {
156  root = new CNode();
157  nodes.push_back(root);
158 
159  name2id = std::map<std::string, int32_t>();
160  name2id["root"] = 0;
161  }
162 
163  virtual ~CTaxonomy()
164  {
165  for (size_t i = 0; i < nodes.size(); i++)
166  delete nodes[i];
167  nodes.clear();
168  name2id.clear();
169  task_histogram.clear();
170  }
171 
176  CNode* get_node(int32_t task_id) {
177  return nodes[task_id];
178  }
179 
184  {
185  nodes[0]->beta = beta;
186  }
187 
193  CNode* add_node(std::string parent_name, std::string child_name, float64_t beta)
194  {
195  if (child_name=="") SG_SERROR("child_name empty")
196  if (parent_name=="") SG_SERROR("parent_name empty")
197 
198 
199  CNode* child_node = new CNode();
200 
201  child_node->beta = beta;
202 
203  nodes.push_back(child_node);
204  int32_t id = nodes.size()-1;
205 
206  name2id[child_name] = id;
207 
208  child_node->setNode_id(id);
209 
210 
211  //create edge
212  CNode* parent = nodes[name2id[parent_name]];
213 
214  parent->add_child(child_node);
215 
216  return child_node;
217  }
218 
223  int32_t get_id(std::string name) {
224  return name2id[name];
225  }
226 
232  std::set<CNode*> intersect_root_path(CNode* node_lhs, CNode* node_rhs)
233  {
234 
235  std::set<CNode*> root_path_lhs = node_lhs->get_path_root();
236  std::set<CNode*> root_path_rhs = node_rhs->get_path_root();
237 
238  std::set<CNode*> intersection;
239 
240  std::set_intersection(root_path_lhs.begin(), root_path_lhs.end(),
241  root_path_rhs.begin(), root_path_rhs.end(),
242  std::inserter(intersection, intersection.end()));
243 
244  return intersection;
245 
246  }
247 
253  float64_t compute_node_similarity(int32_t task_lhs, int32_t task_rhs)
254  {
255 
256  CNode* node_lhs = get_node(task_lhs);
257  CNode* node_rhs = get_node(task_rhs);
258 
259  // compute intersection of paths to root
260  std::set<CNode*> intersection = intersect_root_path(node_lhs, node_rhs);
261 
262  // sum up weights
263  float64_t gamma = 0;
264  for (std::set<CNode*>::const_iterator p = intersection.begin(); p != intersection.end(); ++p) {
265 
266  gamma += (*p)->beta;
267  }
268 
269  return gamma;
270 
271  }
272 
276  void update_task_histogram(std::vector<int32_t> task_vector_lhs) {
277 
278  //empty map
279  task_histogram.clear();
280 
281 
282  //fill map with zeros
283  for (std::vector<int32_t>::const_iterator it=task_vector_lhs.begin(); it!=task_vector_lhs.end(); it++)
284  {
285  task_histogram[*it] = 0.0;
286  }
287 
288  //fill map
289  for (std::vector<int32_t>::const_iterator it=task_vector_lhs.begin(); it!=task_vector_lhs.end(); it++)
290  {
291  task_histogram[*it] += 1.0;
292  }
293 
294  //compute fractions
295  for (std::map<int32_t, float64_t>::const_iterator it=task_histogram.begin(); it!=task_histogram.end(); it++)
296  {
297  task_histogram[it->first] = task_histogram[it->first] / float64_t(task_vector_lhs.size());
298  }
299 
300  }
301 
303  int32_t get_num_nodes()
304  {
305  return (int32_t)(nodes.size());
306  }
307 
309  int32_t get_num_leaves()
310  {
311  int32_t num_leaves = 0;
312 
313  for (int32_t i=0; i!=get_num_nodes(); i++)
314  {
315  if (get_node(i)->is_leaf()==true)
316  {
317  num_leaves++;
318  }
319  }
320 
321  return num_leaves;
322  }
323 
326  {
327  CNode* node = get_node(idx);
328  return node->beta;
329  }
330 
335  void set_node_weight(int32_t idx, float64_t weight)
336  {
337  CNode* node = get_node(idx);
338  node->beta = weight;
339  }
340 
342  virtual const char* get_name() const
343  {
344  return "Taxonomy";
345  }
346 
348  std::map<std::string, int32_t> get_name2id() {
349  return name2id;
350  }
351 
357  int32_t get_id_by_name(std::string name)
358  {
359  return name2id[name];
360  }
361 
362 
363 protected:
364 
368  std::map<std::string, int32_t> name2id;
370  std::vector<CNode*> nodes;
372  std::map<int32_t, float64_t> task_histogram;
373 
374 };
375 
380 {
381 
382 public:
383 
387  {
388  }
389 
396  CMultitaskKernelTreeNormalizer(std::vector<std::string> task_lhs,
397  std::vector<std::string> task_rhs,
399  {
400 
401  taxonomy = tax;
402  set_task_vector_lhs(task_lhs);
403  set_task_vector_rhs(task_rhs);
404 
406 
407  dependency_matrix = std::vector<float64_t>(num_nodes * num_nodes);
408 
409  update_cache();
410  }
411 
412 
415  {
416  }
417 
418 
421  {
422 
423 
424  for (int32_t i=0; i!=num_nodes; i++)
425  {
426  for (int32_t j=0; j!=num_nodes; j++)
427  {
428 
429  float64_t similarity = taxonomy.compute_node_similarity(i, j);
430  set_node_similarity(i,j,similarity);
431 
432  }
433 
434  }
435  }
436 
437 
438 
444  virtual float64_t normalize(float64_t value, int32_t idx_lhs, int32_t idx_rhs)
445  {
446  //lookup tasks
447  int32_t task_idx_lhs = task_vector_lhs[idx_lhs];
448  int32_t task_idx_rhs = task_vector_rhs[idx_rhs];
449 
450  //lookup similarity
451  float64_t task_similarity = get_node_similarity(task_idx_lhs, task_idx_rhs);
452  //float64_t task_similarity = taxonomy.compute_node_similarity(task_idx_lhs, task_idx_rhs);
453 
454  //take task similarity into account
455  float64_t similarity = (value/scale) * task_similarity;
456 
457 
458  return similarity;
459  }
460 
465  virtual float64_t normalize_lhs(float64_t value, int32_t idx_lhs)
466  {
467  SG_ERROR("normalize_lhs not implemented")
468  return 0;
469  }
470 
475  virtual float64_t normalize_rhs(float64_t value, int32_t idx_rhs)
476  {
477  SG_ERROR("normalize_rhs not implemented")
478  return 0;
479  }
480 
481 
483  void set_task_vector_lhs(std::vector<std::string> vec)
484  {
485 
486  task_vector_lhs.clear();
487 
488  for (int32_t i = 0; i != (int32_t)(vec.size()); ++i)
489  {
490  task_vector_lhs.push_back(taxonomy.get_id(vec[i]));
491  }
492 
493  //update task histogram
495 
496  }
497 
499  void set_task_vector_rhs(std::vector<std::string> vec)
500  {
501 
502  task_vector_rhs.clear();
503 
504  for (int32_t i = 0; i != (int32_t)(vec.size()); ++i)
505  {
506  task_vector_rhs.push_back(taxonomy.get_id(vec[i]));
507  }
508 
509  }
510 
512  void set_task_vector(std::vector<std::string> vec)
513  {
514  set_task_vector_lhs(vec);
515  set_task_vector_rhs(vec);
516  }
517 
519  int32_t get_num_betas()
520  {
521 
522  return taxonomy.get_num_nodes();
523 
524  }
525 
529  float64_t get_beta(int32_t idx)
530  {
531 
532  return taxonomy.get_node_weight(idx);
533 
534  }
535 
539  void set_beta(int32_t idx, float64_t weight)
540  {
541 
542  taxonomy.set_node_weight(idx, weight);
543 
544  update_cache();
545 
546  }
547 
548 
554  float64_t get_node_similarity(int32_t node_lhs, int32_t node_rhs)
555  {
556 
557  ASSERT(node_lhs < num_nodes && node_lhs >= 0)
558  ASSERT(node_rhs < num_nodes && node_rhs >= 0)
559 
560  return dependency_matrix[node_lhs * num_nodes + node_rhs];
561 
562  }
563 
569  void set_node_similarity(int32_t node_lhs, int32_t node_rhs,
570  float64_t similarity)
571  {
572 
573  ASSERT(node_lhs < num_nodes && node_lhs >= 0)
574  ASSERT(node_rhs < num_nodes && node_rhs >= 0)
575 
576  dependency_matrix[node_lhs * num_nodes + node_rhs] = similarity;
577 
578  }
579 
581  virtual const char* get_name() const
582  {
583  return "MultitaskKernelTreeNormalizer";
584  }
585 
590  {
591  return dynamic_cast<CMultitaskKernelTreeNormalizer*>(n);
592  }
593 
594 protected:
597 
599  int32_t num_nodes;
600 
602  std::vector<int32_t> task_vector_lhs;
603 
605  std::vector<int32_t> task_vector_rhs;
606 
608  std::vector<float64_t> dependency_matrix;
609 };
610 }
611 #endif
void add_child(CNode *node)
void set_task_vector_rhs(std::vector< std::string > vec)
CTaxonomy is used to describe hierarchical structure between tasks.
std::map< std::string, int32_t > get_name2id()
int32_t get_id(std::string name)
virtual float64_t normalize_rhs(float64_t value, int32_t idx_rhs)
void set_root_beta(float64_t beta)
virtual const char * get_name() const
std::map< std::string, int32_t > name2id
#define SG_ERROR(...)
Definition: SGIO.h:128
CMultitaskKernelTreeNormalizer(std::vector< std::string > task_lhs, std::vector< std::string > task_rhs, CTaxonomy tax)
virtual const char * get_name() const
std::map< int32_t, float64_t > task_histogram
A CNode is an element of a CTaxonomy, which is used to describe hierarchical structure between tasks...
void set_node_similarity(int32_t node_lhs, int32_t node_rhs, float64_t similarity)
void setNode_id(int32_t node_idx)
std::set< CNode * > get_path_root()
std::set< CNode * > intersect_root_path(CNode *node_lhs, CNode *node_rhs)
#define ASSERT(x)
Definition: SGIO.h:200
Class SGObject is the base class of all shogun objects.
Definition: SGObject.h:125
void set_task_vector_lhs(std::vector< std::string > vec)
std::vector< CNode * > children
virtual float64_t normalize_lhs(float64_t value, int32_t idx_lhs)
int32_t get_id_by_name(std::string name)
float64_t get_node_weight(int32_t idx)
double float64_t
Definition: common.h:60
CNode * add_node(std::string parent_name, std::string child_name, float64_t beta)
The class Kernel Normalizer defines a function to post-process kernel values.
CMultitaskKernelTreeNormalizer * KernelNormalizerToMultitaskKernelTreeNormalizer(CKernelNormalizer *n)
float64_t compute_node_similarity(int32_t task_lhs, int32_t task_rhs)
void set_beta(int32_t idx, float64_t weight)
Base-class for parameterized Kernel Normalizers.
CNode * get_node(int32_t task_id)
void update_task_histogram(std::vector< int32_t > task_vector_lhs)
all of classes and functions are contained in the shogun namespace
Definition: class_list.h:18
#define SG_SERROR(...)
Definition: SGIO.h:178
void set_node_weight(int32_t idx, float64_t weight)
void set_task_vector(std::vector< std::string > vec)
virtual float64_t normalize(float64_t value, int32_t idx_lhs, int32_t idx_rhs)
float64_t get_node_similarity(int32_t node_lhs, int32_t node_rhs)
std::vector< int32_t > get_task_ids_below()
std::vector< CNode * > nodes
The MultitaskKernel allows Multitask Learning via a modified kernel function based on taxonomy...

SHOGUN Machine Learning Toolbox - Documentation