SHOGUN  5.0.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules
ECOCRandomDenseEncoder.cpp
Go to the documentation of this file.
1 /*
2  * This program is free software; you can redistribute it and/or modify
3  * it under the terms of the GNU General Public License as published by
4  * the Free Software Foundation; either version 3 of the License, or
5  * (at your option) any later version.
6  *
7  * Written (W) 2012 Chiyuan Zhang
8  * Copyright (C) 2012 Chiyuan Zhang
9  */
10 
11 #include <algorithm>
12 #include <limits>
13 
16 
17 using namespace shogun;
18 
19 CECOCRandomDenseEncoder::CECOCRandomDenseEncoder(int32_t maxiter, int32_t codelen, float64_t pposone)
20  :m_maxiter(maxiter), m_codelen(codelen), m_pposone(pposone)
21 {
22  if (!check_probability(pposone))
23  SG_ERROR("invalid probability of +1")
24 
25  init();
26 }
27 
28 void CECOCRandomDenseEncoder::init()
29 {
30  SG_ADD(&m_maxiter, "maxiter", "max number of iterations", MS_NOT_AVAILABLE);
31  SG_ADD(&m_codelen, "codelen", "code length", MS_NOT_AVAILABLE);
32  SG_ADD(&m_pposone, "pposone", "probability of +1", MS_NOT_AVAILABLE);
33 }
34 
36 {
37  if (!check_probability(pposone))
38  SG_ERROR("probability of 0, +1 and -1 must sum to one")
39 
40  m_pposone = pposone;
41 }
42 
44 {
45  int32_t codelen = m_codelen;
46  if (codelen <= 0)
47  codelen = get_default_code_length(num_classes);
48 
49 
50  SGMatrix<int32_t> best_codebook(codelen, num_classes, true);
51  int32_t best_dist = 0;
52 
53  SGMatrix<int32_t> codebook(codelen, num_classes);
54  int32_t n_iter = 0;
55  while (true)
56  {
57  // fill codebook
58  codebook.zero();
59  for (int32_t i=0; i < codelen; ++i)
60  {
61  for (int32_t j=0; j < num_classes; ++j)
62  {
63  float64_t randval = CMath::random(0.0, 1.0);
64  if (randval > m_pposone)
65  codebook(i, j) = -1;
66  else
67  codebook(i, j) = +1;
68  }
69  }
70 
71  bool valid = true;
72  for (int32_t i=0; i < codelen; ++i)
73  {
74  bool p1_occur = false, n1_occur = false;
75  for (int32_t j=0; j < num_classes; ++j)
76  if (codebook(i, j) == 1)
77  p1_occur = true;
78  else if (codebook(i, j) == -1)
79  n1_occur = true;
80 
81  if (!p1_occur || !n1_occur)
82  {
83  valid = false;
84  break;
85  }
86  }
87 
88  if (valid)
89  {
90  // see if this is a better codebook
91  // compute the minimum pairwise code distance
92  int32_t min_dist = std::numeric_limits<int32_t>::max();
93  for (int32_t i=0; i < num_classes; ++i)
94  {
95  for (int32_t j=i+1; j < num_classes; ++j)
96  {
97  int32_t dist = CECOCUtil::hamming_distance(codebook.get_column_vector(i),
98  codebook.get_column_vector(j), codelen);
99  if (dist < min_dist)
100  min_dist = dist;
101  }
102  }
103 
104  if (min_dist > best_dist)
105  {
106  best_dist = min_dist;
107  std::copy(codebook.matrix, codebook.matrix + codelen*num_classes,
108  best_codebook.matrix);
109  }
110  }
111 
112  if (++n_iter >= m_maxiter)
113  if (best_dist > 0) // already obtained a good codebook
114  break;
115  }
116 
117  return best_codebook;
118 }
int32_t m_maxiter
max number of iterations
static int32_t hamming_distance(T1 *c1, T2 *c2, int32_t len)
Definition: ECOCUtil.h:31
#define SG_ERROR(...)
Definition: SGIO.h:129
static uint64_t random()
Definition: Math.h:1019
CECOCRandomDenseEncoder(int32_t maxiter=10000, int32_t codelen=0, float64_t pposone=0.5)
int32_t get_default_code_length(int32_t num_classes) const
double float64_t
Definition: common.h:50
T * get_column_vector(index_t col) const
Definition: SGMatrix.h:113
float64_t m_pposone
probability of +1
all of classes and functions are contained in the shogun namespace
Definition: class_list.h:18
virtual SGMatrix< int32_t > create_codebook(int32_t num_classes)
Matrix::Scalar max(Matrix m)
Definition: Redux.h:68
#define SG_ADD(...)
Definition: SGObject.h:84

SHOGUN Machine Learning Toolbox - Documentation