SHOGUN  6.0.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules
StreamingStringFeatures.cpp
Go to the documentation of this file.
2 
3 namespace shogun
4 {
5 
6 
7 template <class T>
9 {
10  init();
12  remap_to_bin=false;
13 }
14 
15 template <class T>
17  bool is_labelled,
18  int32_t size)
20 {
21  init(file, is_labelled, size);
23  remap_to_bin=false;
24 }
25 
26 template <class T>
28 {
29  if (parser.is_running())
30  parser.end_parser();
31  SG_UNREF(alphabet);
32 }
33 
34 template <class T>
36 {
37  SG_UNREF(alphabet);
38 
39  alphabet=new CAlphabet(alpha);
40  SG_REF(alphabet);
41  num_symbols=alphabet->get_num_symbols();
42 }
43 
44 template <class T>
46 {
47  SG_UNREF(alphabet);
48 
49  alphabet=new CAlphabet(alpha);
50  SG_REF(alphabet);
51  num_symbols=alphabet->get_num_symbols();
52 }
53 
54 template <class T>
55 void CStreamingStringFeatures<T>::set_remap(CAlphabet* ascii_alphabet, CAlphabet* binary_alphabet)
56 {
57  remap_to_bin=true;
58  alpha_ascii=new CAlphabet(ascii_alphabet);
59  alpha_bin=new CAlphabet(binary_alphabet);
60 }
61 
62 template <class T>
63 void CStreamingStringFeatures<T>::set_remap(EAlphabet ascii_alphabet, EAlphabet binary_alphabet)
64 {
65  remap_to_bin=true;
66  alpha_ascii=new CAlphabet(ascii_alphabet);
67  alpha_bin=new CAlphabet(binary_alphabet);
68 }
69 
70 template <class T>
72 {
73  SG_REF(alphabet);
74  return alphabet;
75 }
76 
77 template <class T>
79 {
80  return num_symbols;
81 }
82 
83 template <class T>
85 {
86  if (current_string)
87  return 1;
88  return 0;
89 }
90 
91 template <class T>
93 {
94  return current_length;
95 }
96 
98 {
99  parser.set_read_vector(&CStreamingFile::get_string);
100 }
101 
103 {
104  parser.set_read_vector_and_label
106 }
107 
108 #define GET_FEATURE_TYPE(f_type, sg_type) \
109 template<> EFeatureType CStreamingStringFeatures<sg_type>::get_feature_type() const \
110 { \
111  return f_type; \
112 }
113 
116 GET_FEATURE_TYPE(F_BYTE, uint8_t)
117 GET_FEATURE_TYPE(F_BYTE, int8_t)
118 GET_FEATURE_TYPE(F_SHORT, int16_t)
119 GET_FEATURE_TYPE(F_WORD, uint16_t)
120 GET_FEATURE_TYPE(F_INT, int32_t)
121 GET_FEATURE_TYPE(F_UINT, uint32_t)
122 GET_FEATURE_TYPE(F_LONG, int64_t)
123 GET_FEATURE_TYPE(F_ULONG, uint64_t)
127 #undef GET_FEATURE_TYPE
128 
129 
130 template <class T>
131 void CStreamingStringFeatures<T>::init()
132 {
133  working_file=NULL;
134  alphabet=new CAlphabet();
135 
136  current_string=NULL;
137  current_length=-1;
138  current_sgstring.string=current_string;
139  current_sgstring.slen=current_length;
140 
141  set_generic<T>();
142 }
143 
144 template <class T>
145 void CStreamingStringFeatures<T>::init(CStreamingFile* file,
146  bool is_labelled,
147  int32_t size)
148 {
149  init();
150  has_labels=is_labelled;
151  working_file=file;
152  parser.init(file, is_labelled, size);
153  parser.set_free_vector_after_release(false);
154  parser.set_free_vectors_on_destruct(false);
155 }
156 
157 template <class T>
159 {
160  if (!remap_to_bin)
161  alpha_ascii=alphabet;
162 
163  if (!parser.is_running())
164  parser.start_parser();
165 }
166 
167 template <class T>
169 {
170  parser.end_parser();
171 }
172 
173 template <class T>
175 {
176  bool ret_value;
177 
178  ret_value = (bool) parser.get_next_example(current_string,
179  current_length,
180  current_label);
181 
182  if (!ret_value)
183  return false;
184 
185  int32_t i;
186  if (remap_to_bin)
187  {
188  alpha_ascii->add_string_to_histogram(current_string, current_length);
189 
190  for (i=0; i<current_length; i++)
191  current_string[i]=alpha_ascii->remap_to_bin(current_string[i]);
192  alpha_bin->add_string_to_histogram(current_string, current_length);
193  }
194  else
195  {
196  alpha_ascii->add_string_to_histogram(current_string, current_length);
197  }
198 
199  /* Check the input using src alphabet, alpha_ascii */
200  if ( !(alpha_ascii->check_alphabet_size() && alpha_ascii->check_alphabet()) )
201  {
202  SG_ERROR("StreamingStringFeatures: The given input was found to be incompatible with the alphabet!\n")
203  return 0;
204  }
205 
206  //SG_UNREF(alphabet);
207 
208  if (remap_to_bin)
209  alphabet=alpha_bin;
210  else
211  alphabet=alpha_ascii;
212 
213  //SG_REF(alphabet);
214  num_symbols=alphabet->get_num_symbols();
215 
216  return ret_value;
217 }
218 
219 template <class T>
221 {
222  current_sgstring.string=current_string;
223  current_sgstring.slen=current_length;
224 
225  return current_sgstring;
226 }
227 
228 template <class T>
230 {
231  ASSERT(has_labels)
232 
233  return current_label;
234 }
235 
236 template <class T>
238 {
239  parser.finalize_example();
240 }
241 
242 template <class T>
244 {
245  return current_length;
246 }
247 
248 template <class T>
250 {
251  return C_STREAMING_STRING;
252 }
253 
254 template class CStreamingStringFeatures<bool>;
255 template class CStreamingStringFeatures<char>;
256 template class CStreamingStringFeatures<int8_t>;
257 template class CStreamingStringFeatures<uint8_t>;
258 template class CStreamingStringFeatures<int16_t>;
260 template class CStreamingStringFeatures<int32_t>;
262 template class CStreamingStringFeatures<int64_t>;
267 
268 }
virtual void get_string(bool *&vector, int32_t &len)
EAlphabet
Alphabet of charfeatures/observations.
Definition: Alphabet.h:23
#define SG_ERROR(...)
Definition: SGIO.h:128
The class Alphabet implements an alphabet and alphabet utility functions.
Definition: Alphabet.h:91
#define SG_REF(x)
Definition: SGObject.h:52
EFeatureClass
shogun feature class
Definition: FeatureTypes.h:38
A Streaming File access class.
Definition: StreamingFile.h:34
virtual void get_string_and_label(bool *&vector, int32_t &len, float64_t &label)
virtual EFeatureClass get_feature_class() const
shogun string
#define ASSERT(x)
Definition: SGIO.h:200
bool remap_to_bin
Whether remapping must be done.
This class implements streaming features as strings.
double float64_t
Definition: common.h:60
long double floatmax_t
Definition: common.h:61
float float32_t
Definition: common.h:59
#define SG_UNREF(x)
Definition: SGObject.h:53
all of classes and functions are contained in the shogun namespace
Definition: class_list.h:18
#define GET_FEATURE_TYPE(f_type, sg_type)
Streaming features are features which are used for online algorithms.
void set_remap(CAlphabet *ascii_alphabet, CAlphabet *binary_alphabet)

SHOGUN Machine Learning Toolbox - Documentation