SHOGUN  4.2.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
Histogram.cpp
Go to the documentation of this file.
1 /*
2  * This program is free software; you can redistribute it and/or modify
3  * it under the terms of the GNU General Public License as published by
4  * the Free Software Foundation; either version 3 of the License, or
5  * (at your option) any later version.
6  *
7  * Written (W) 1999-2009 Soeren Sonnenburg
8  * Written (W) 1999-2008 Gunnar Raetsch
9  * Copyright (C) 1999-2009 Fraunhofer Institute FIRST and Max-Planck-Society
10  */
11 
13 #include <shogun/lib/common.h>
15 #include <shogun/io/SGIO.h>
17 
18 using namespace shogun;
19 
21 : CDistribution()
22 {
23  hist=SG_CALLOC(float64_t, 1<<16);
24 }
25 
27 : CDistribution()
28 {
29  hist=SG_CALLOC(float64_t, 1<<16);
30  features=f;
32 }
33 
35 {
36  SG_FREE(hist);
37 }
38 
40 {
41  int32_t vec;
42  int32_t feat;
43  int32_t i;
44 
45  if (data)
46  {
47  if (data->get_feature_class() != C_STRING ||
48  data->get_feature_type() != F_WORD)
49  {
50  SG_ERROR("Expected features of class string type word\n")
51  }
52  set_features(data);
53  }
54 
58 
59  for (i=0; i< (int32_t) (1<<16); i++)
60  hist[i]=0;
61 
62  for (vec=0; vec<features->get_num_vectors(); vec++)
63  {
64  int32_t len;
65  bool free_vec;
66 
67  uint16_t* vector=((CStringFeatures<uint16_t>*) features)->
68  get_feature_vector(vec, len, free_vec);
69 
70  for (feat=0; feat<len ; feat++)
71  hist[vector[feat]]++;
72 
74  free_feature_vector(vector, vec, free_vec);
75  }
76 
77  for (i=0; i< (int32_t) (1<<16); i++)
78  hist[i]=log(hist[i]);
79 
80  return true;
81 }
82 
84 {
88 
89  int32_t len;
90  bool free_vec;
91  float64_t loglik=0;
92 
93  uint16_t* vector=((CStringFeatures<uint16_t>*) features)->
94  get_feature_vector(num_example, len, free_vec);
95 
96  for (int32_t i=0; i<len; i++)
97  loglik+=hist[vector[i]];
98 
100  free_feature_vector(vector, num_example, free_vec);
101 
102  return loglik;
103 }
104 
105 float64_t CHistogram::get_log_derivative(int32_t num_param, int32_t num_example)
106 {
107  if (hist[num_param] < CMath::ALMOST_NEG_INFTY)
108  return -CMath::INFTY;
109  else
110  {
114 
115  int32_t len;
116  bool free_vec;
117  float64_t deriv=0;
118 
119  uint16_t* vector=((CStringFeatures<uint16_t>*) features)->
120  get_feature_vector(num_example, len, free_vec);
121 
122  int32_t num_occurences=0;
123 
124  for (int32_t i=0; i<len; i++)
125  {
126  deriv+=hist[vector[i]];
127 
128  if (vector[i]==num_param)
129  num_occurences++;
130  }
131 
133  free_feature_vector(vector, num_example, free_vec);
134 
135  if (num_occurences>0)
136  deriv+=CMath::log((float64_t) num_occurences)-hist[num_param];
137  else
138  deriv=-CMath::INFTY;
139 
140  return deriv;
141  }
142 }
143 
145 {
146  return hist[num_param];
147 }
148 
150 {
151  ASSERT(histogram.vlen==get_num_model_parameters())
152 
153  SG_FREE(hist);
154  hist=SG_MALLOC(float64_t, histogram.vlen);
155  for (int32_t i=0; i<histogram.vlen; i++)
156  hist[i]=histogram.vector[i];
157 
158  return true;
159 }
160 
162 {
164 }
165 
virtual void set_features(CFeatures *f)
Definition: Distribution.h:160
static const float64_t INFTY
infinity
Definition: Math.h:2048
virtual bool set_histogram(const SGVector< float64_t > histogram)
Definition: Histogram.cpp:149
virtual int32_t get_num_vectors() const =0
#define SG_ERROR(...)
Definition: SGIO.h:129
Base class Distribution from which all methods implementing a distribution are derived.
Definition: Distribution.h:44
#define SG_REF(x)
Definition: SGObject.h:51
virtual SGVector< float64_t > get_histogram()
Definition: Histogram.cpp:161
static const float64_t ALMOST_NEG_INFTY
almost neg (log) infinity
Definition: Math.h:2052
index_t vlen
Definition: SGVector.h:492
#define ASSERT(x)
Definition: SGIO.h:201
double float64_t
Definition: common.h:50
virtual EFeatureClass get_feature_class() const =0
virtual float64_t get_log_model_parameter(int32_t num_param)
Definition: Histogram.cpp:144
virtual ~CHistogram()
Definition: Histogram.cpp:34
float64_t * hist
Definition: Histogram.h:98
virtual bool train(CFeatures *data=NULL)
Definition: Histogram.cpp:39
all of classes and functions are contained in the shogun namespace
Definition: class_list.h:18
The class Features is the base class of all feature objects.
Definition: Features.h:68
static float64_t log(float64_t v)
Definition: Math.h:922
virtual float64_t get_log_likelihood_example(int32_t num_example)
Definition: Histogram.cpp:83
virtual int32_t get_num_model_parameters()
Definition: Histogram.h:55
virtual float64_t get_log_derivative(int32_t num_param, int32_t num_example)
Definition: Histogram.cpp:105
virtual EFeatureType get_feature_type() const =0

SHOGUN Machine Learning Toolbox - Documentation