SHOGUN  3.2.1
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
Histogram.cpp
Go to the documentation of this file.
1 /*
2  * This program is free software; you can redistribute it and/or modify
3  * it under the terms of the GNU General Public License as published by
4  * the Free Software Foundation; either version 3 of the License, or
5  * (at your option) any later version.
6  *
7  * Written (W) 1999-2009 Soeren Sonnenburg
8  * Written (W) 1999-2008 Gunnar Raetsch
9  * Copyright (C) 1999-2009 Fraunhofer Institute FIRST and Max-Planck-Society
10  */
11 
13 #include <shogun/lib/common.h>
15 #include <shogun/io/SGIO.h>
17 
18 using namespace shogun;
19 
21 : CDistribution()
22 {
23  hist=SG_CALLOC(float64_t, 1<<16);
24 }
25 
27 : CDistribution()
28 {
29  hist=SG_CALLOC(float64_t, 1<<16);
30  features=f;
32 }
33 
35 {
36  SG_FREE(hist);
37 }
38 
40 {
41  int32_t vec;
42  int32_t feat;
43  int32_t i;
44 
45  if (data)
46  {
47  if (data->get_feature_class() != C_STRING ||
48  data->get_feature_type() != F_WORD)
49  {
50  SG_ERROR("Expected features of class string type word\n")
51  }
52  set_features(data);
53  }
54 
58 
59  for (i=0; i< (int32_t) (1<<16); i++)
60  hist[i]=0;
61 
62  for (vec=0; vec<features->get_num_vectors(); vec++)
63  {
64  int32_t len;
65  bool free_vec;
66 
67  uint16_t* vector=((CStringFeatures<uint16_t>*) features)->
68  get_feature_vector(vec, len, free_vec);
69 
70  for (feat=0; feat<len ; feat++)
71  hist[vector[feat]]++;
72 
74  free_feature_vector(vector, vec, free_vec);
75  }
76 
77  for (i=0; i< (int32_t) (1<<16); i++)
78  hist[i]=log(hist[i]);
79 
80  return true;
81 }
82 
84 {
88 
89  int32_t len;
90  bool free_vec;
91  float64_t loglik=0;
92 
93  uint16_t* vector=((CStringFeatures<uint16_t>*) features)->
94  get_feature_vector(num_example, len, free_vec);
95 
96  for (int32_t i=0; i<len; i++)
97  loglik+=hist[vector[i]];
98 
100  free_feature_vector(vector, num_example, free_vec);
101 
102  return loglik;
103 }
104 
105 float64_t CHistogram::get_log_derivative(int32_t num_param, int32_t num_example)
106 {
107  if (hist[num_param] < CMath::ALMOST_NEG_INFTY)
108  return -CMath::INFTY;
109  else
110  {
114 
115  int32_t len;
116  bool free_vec;
117  float64_t deriv=0;
118 
119  uint16_t* vector=((CStringFeatures<uint16_t>*) features)->
120  get_feature_vector(num_example, len, free_vec);
121 
122  int32_t num_occurences=0;
123 
124  for (int32_t i=0; i<len; i++)
125  {
126  deriv+=hist[vector[i]];
127 
128  if (vector[i]==num_param)
129  num_occurences++;
130  }
131 
133  free_feature_vector(vector, num_example, free_vec);
134 
135  if (num_occurences>0)
136  deriv+=CMath::log((float64_t) num_occurences)-hist[num_param];
137  else
138  deriv=-CMath::INFTY;
139 
140  return deriv;
141  }
142 }
143 
145 {
146  return hist[num_param];
147 }
148 
150 {
151  ASSERT(histogram.vlen==get_num_model_parameters())
152 
153  SG_FREE(hist);
154  hist=SG_MALLOC(float64_t, histogram.vlen);
155  for (int32_t i=0; i<histogram.vlen; i++)
156  hist[i]=histogram.vector[i];
157 
158  return true;
159 }
160 
162 {
164 }
165 

SHOGUN Machine Learning Toolbox - Documentation