SHOGUN  3.2.1
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
HMM.h
Go to the documentation of this file.
1 /*
2  * This program is free software; you can redistribute it and/or modify
3  * it under the terms of the GNU General Public License as published by
4  * the Free Software Foundation; either version 3 of the License, or
5  * (at your option) any later version.
6  *
7  * Written (W) 1999-2009 Soeren Sonnenburg
8  * Written (W) 1999-2008 Gunnar Raetsch
9  * Copyright (C) 1999-2009 Fraunhofer Institute FIRST and Max-Planck-Society
10  */
11 
12 #ifndef __CHMM_H__
13 #define __CHMM_H__
14 
16 #include <shogun/lib/common.h>
17 #include <shogun/io/SGIO.h>
18 #include <shogun/lib/config.h>
22 
23 #include <stdio.h>
24 
25 #ifdef USE_HMMPARALLEL
26 #define USE_HMMPARALLEL_STRUCTURES 1
27 #endif
28 
29 namespace shogun
30 {
31  class CFeatures;
32  template <class ST> class CStringFeatures;
35 
38 
39 #ifndef DOXYGEN_SHOULD_SKIP_THIS
40 
41 struct T_ALPHA_BETA
42 {
44  int32_t dimension;
45 
47  T_ALPHA_BETA_TABLE* table;
48 
50  bool updated;
51 
53  float64_t sum;
54 };
55 #endif // DOXYGEN_SHOULD_SKIP_THIS
56 
61 #ifdef USE_BIGSTATES
62 typedef uint16_t T_STATES ;
63 #else
64 typedef uint8_t T_STATES ;
65 #endif
66 typedef T_STATES* P_STATES ;
67 
69 
72 {
83 };
84 
85 
87 class Model
88 {
89  public:
91  Model();
92 
94  virtual ~Model();
95 
97  inline void sort_learn_a()
98  {
99  CMath::sort(learn_a,2) ;
100  }
101 
103  inline void sort_learn_b()
104  {
105  CMath::sort(learn_b,2) ;
106  }
107 
112 
113  inline int32_t get_learn_a(int32_t line, int32_t column) const
114  {
115  return learn_a[line*2 + column];
116  }
117 
119  inline int32_t get_learn_b(int32_t line, int32_t column) const
120  {
121  return learn_b[line*2 + column];
122  }
123 
125  inline int32_t get_learn_p(int32_t offset) const
126  {
127  return learn_p[offset];
128  }
129 
131  inline int32_t get_learn_q(int32_t offset) const
132  {
133  return learn_q[offset];
134  }
135 
137  inline int32_t get_const_a(int32_t line, int32_t column) const
138  {
139  return const_a[line*2 + column];
140  }
141 
143  inline int32_t get_const_b(int32_t line, int32_t column) const
144  {
145  return const_b[line*2 + column];
146  }
147 
149  inline int32_t get_const_p(int32_t offset) const
150  {
151  return const_p[offset];
152  }
153 
155  inline int32_t get_const_q(int32_t offset) const
156  {
157  return const_q[offset];
158  }
159 
161  inline float64_t get_const_a_val(int32_t line) const
162  {
163  return const_a_val[line];
164  }
165 
167  inline float64_t get_const_b_val(int32_t line) const
168  {
169  return const_b_val[line];
170  }
171 
173  inline float64_t get_const_p_val(int32_t offset) const
174  {
175  return const_p_val[offset];
176  }
177 
179  inline float64_t get_const_q_val(int32_t offset) const
180  {
181  return const_q_val[offset];
182  }
183 #ifdef FIX_POS
184 
185  inline char get_fix_pos_state(int32_t pos, T_STATES state, T_STATES num_states)
186  {
187 #ifdef HMM_DEBUG
188  if ((pos<0)||(pos*num_states+state>65336))
189  SG_DEBUG("index out of range in get_fix_pos_state(%i,%i,%i) \n", pos,state,num_states)
190 #endif
191  return fix_pos_state[pos*num_states+state] ;
192  }
193 #endif
194 
195 
200 
201  inline void set_learn_a(int32_t offset, int32_t value)
202  {
203  learn_a[offset]=value;
204  }
205 
207  inline void set_learn_b(int32_t offset, int32_t value)
208  {
209  learn_b[offset]=value;
210  }
211 
213  inline void set_learn_p(int32_t offset, int32_t value)
214  {
215  learn_p[offset]=value;
216  }
217 
219  inline void set_learn_q(int32_t offset, int32_t value)
220  {
221  learn_q[offset]=value;
222  }
223 
225  inline void set_const_a(int32_t offset, int32_t value)
226  {
227  const_a[offset]=value;
228  }
229 
231  inline void set_const_b(int32_t offset, int32_t value)
232  {
233  const_b[offset]=value;
234  }
235 
237  inline void set_const_p(int32_t offset, int32_t value)
238  {
239  const_p[offset]=value;
240  }
241 
243  inline void set_const_q(int32_t offset, int32_t value)
244  {
245  const_q[offset]=value;
246  }
247 
249  inline void set_const_a_val(int32_t offset, float64_t value)
250  {
251  const_a_val[offset]=value;
252  }
253 
255  inline void set_const_b_val(int32_t offset, float64_t value)
256  {
257  const_b_val[offset]=value;
258  }
259 
261  inline void set_const_p_val(int32_t offset, float64_t value)
262  {
263  const_p_val[offset]=value;
264  }
265 
267  inline void set_const_q_val(int32_t offset, float64_t value)
268  {
269  const_q_val[offset]=value;
270  }
271 #ifdef FIX_POS
272 
273  inline void set_fix_pos_state(
274  int32_t pos, T_STATES state, T_STATES num_states, char value)
275  {
276 #ifdef HMM_DEBUG
277  if ((pos<0)||(pos*num_states+state>65336))
278  SG_DEBUG("index out of range in set_fix_pos_state(%i,%i,%i,%i) [%i]\n", pos,state,num_states,(int)value, pos*num_states+state)
279 #endif
280  fix_pos_state[pos*num_states+state]=value;
281  if (value==FIX_ALLOWED)
282  for (int32_t i=0; i<num_states; i++)
283  if (get_fix_pos_state(pos,i,num_states)==FIX_DEFAULT)
284  set_fix_pos_state(pos,i,num_states,FIX_DISALLOWED) ;
285  }
287 
289  const static char FIX_DISALLOWED ;
290 
292  const static char FIX_ALLOWED ;
293 
295  const static char FIX_DEFAULT ;
296 
298  const static float64_t DISALLOWED_PENALTY ;
299 #endif
300  protected:
307 
308  int32_t* learn_a;
309 
311  int32_t* learn_b;
312 
314  int32_t* learn_p;
315 
317  int32_t* learn_q;
319 
326 
327  int32_t* const_a;
328 
330  int32_t* const_b;
331 
333  int32_t* const_p;
334 
336  int32_t* const_q;
337 
338 
341 
344 
347 
350 
351 #ifdef FIX_POS
352 
355  char* fix_pos_state;
356 #endif
357 
358 };
359 
360 
371 class CHMM : public CDistribution
372 {
373  private:
374 
375  T_STATES trans_list_len ;
376  T_STATES **trans_list_forward ;
377  T_STATES *trans_list_forward_cnt ;
378  float64_t **trans_list_forward_val ;
379  T_STATES **trans_list_backward ;
380  T_STATES *trans_list_backward_cnt ;
381  bool mem_initialized ;
382 
383 #ifdef USE_HMMPARALLEL_STRUCTURES
384 
386  struct S_DIM_THREAD_PARAM
387  {
388  CHMM* hmm;
389  int32_t dim;
390  float64_t prob_sum;
391  };
392 
394  struct S_BW_THREAD_PARAM
395  {
396  CHMM* hmm;
397  int32_t dim_start;
398  int32_t dim_stop;
399 
400  float64_t ret;
401 
402  float64_t* p_buf;
403  float64_t* q_buf;
404  float64_t* a_buf;
405  float64_t* b_buf;
406  };
407 
408  inline T_ALPHA_BETA & ALPHA_CACHE(int32_t dim) {
409  return alpha_cache[dim%parallel->get_num_threads()] ; } ;
410  inline T_ALPHA_BETA & BETA_CACHE(int32_t dim) {
411  return beta_cache[dim%parallel->get_num_threads()] ; } ;
412 #ifdef USE_LOGSUMARRAY
413  inline float64_t* ARRAYS(int32_t dim) {
414  return arrayS[dim%parallel->get_num_threads()] ; } ;
415 #endif
416  inline float64_t* ARRAYN1(int32_t dim) {
417  return arrayN1[dim%parallel->get_num_threads()] ; } ;
418  inline float64_t* ARRAYN2(int32_t dim) {
419  return arrayN2[dim%parallel->get_num_threads()] ; } ;
420  inline T_STATES* STATES_PER_OBSERVATION_PSI(int32_t dim) {
422  inline const T_STATES* STATES_PER_OBSERVATION_PSI(int32_t dim) const {
424  inline T_STATES* PATH(int32_t dim) {
425  return path[dim%parallel->get_num_threads()] ; } ;
426  inline bool & PATH_PROB_UPDATED(int32_t dim) {
427  return path_prob_updated[dim%parallel->get_num_threads()] ; } ;
428  inline int32_t & PATH_PROB_DIMENSION(int32_t dim) {
429  return path_prob_dimension[dim%parallel->get_num_threads()] ; } ;
430 #else
431  inline T_ALPHA_BETA & ALPHA_CACHE(int32_t /*dim*/) {
432  return alpha_cache ; } ;
433  inline T_ALPHA_BETA & BETA_CACHE(int32_t /*dim*/) {
434  return beta_cache ; } ;
435 #ifdef USE_LOGSUMARRAY
436  inline float64_t* ARRAYS(int32_t dim) {
437  return arrayS ; } ;
438 #endif
439  inline float64_t* ARRAYN1(int32_t /*dim*/) {
440  return arrayN1 ; } ;
441  inline float64_t* ARRAYN2(int32_t /*dim*/) {
442  return arrayN2 ; } ;
443  inline T_STATES* STATES_PER_OBSERVATION_PSI(int32_t /*dim*/) {
444  return states_per_observation_psi ; } ;
445  inline const T_STATES* STATES_PER_OBSERVATION_PSI(int32_t /*dim*/) const {
446  return states_per_observation_psi ; } ;
447  inline T_STATES* PATH(int32_t /*dim*/) {
448  return path ; } ;
449  inline bool & PATH_PROB_UPDATED(int32_t /*dim*/) {
450  return path_prob_updated ; } ;
451  inline int32_t & PATH_PROB_DIMENSION(int32_t /*dim*/) {
452  return path_prob_dimension ; } ;
453 #endif
454 
459  bool converged(float64_t x, float64_t y);
460 
466  public:
468  CHMM();
469 
480  CHMM(
481  int32_t N, int32_t M, Model* model, float64_t PSEUDO);
482  CHMM(
483  CStringFeatures<uint16_t>* obs, int32_t N, int32_t M,
484  float64_t PSEUDO);
485  CHMM(
486  int32_t N, float64_t* p, float64_t* q, float64_t* a);
487  CHMM(
488  int32_t N, float64_t* p, float64_t* q, int32_t num_trans,
489  float64_t* a_trans);
490 
495  CHMM(FILE* model_file, float64_t PSEUDO);
496 
498  CHMM(CHMM* h);
499 
501  virtual ~CHMM();
502 
511  virtual bool train(CFeatures* data=NULL);
512  virtual int32_t get_num_model_parameters() { return N*(N+M+2); }
513  virtual float64_t get_log_model_parameter(int32_t num_param);
514  virtual float64_t get_log_derivative(int32_t num_param, int32_t num_example);
515  virtual float64_t get_log_likelihood_example(int32_t num_example)
516  {
517  return model_probability(num_example);
518  }
519 
525  bool initialize(Model* model, float64_t PSEUDO, FILE* model_file=NULL);
527 
530 
533 
545  float64_t forward_comp(int32_t time, int32_t state, int32_t dimension);
547  int32_t time, int32_t state, int32_t dimension);
548 
556  float64_t backward_comp(int32_t time, int32_t state, int32_t dimension);
558  int32_t time, int32_t state, int32_t dimension);
559 
564  float64_t best_path(int32_t dimension);
565  inline uint16_t get_best_path_state(int32_t dim, int32_t t)
566  {
567  ASSERT(PATH(dim))
568  return PATH(dim)[t];
569  }
570 
574 
576  inline float64_t model_probability(int32_t dimension=-1)
577  {
578  //for faster calculation cache model probability
579  if (dimension==-1)
580  {
581  if (mod_prob_updated)
583  else
585  }
586  else
587  return forward(p_observations->get_vector_length(dimension), 0, dimension);
588  }
589 
595  inline float64_t linear_model_probability(int32_t dimension)
596  {
597  float64_t lik=0;
598  int32_t len=0;
599  bool free_vec;
600  uint16_t* o=p_observations->get_feature_vector(dimension, len, free_vec);
602 
603  ASSERT(N==len)
604 
605  for (int32_t i=0; i<N; i++)
606  {
607  lik+=obs_b[*o++];
608  obs_b+=M;
609  }
610  p_observations->free_feature_vector(o, dimension, free_vec);
611  return lik;
612 
613  // sorry, the above code is the speed optimized version of :
614  /* float64_t lik=0;
615 
616  for (int32_t i=0; i<N; i++)
617  lik+=get_b(i, p_observations->get_feature(dimension, i));
618  return lik;
619  */
620  // : that
621  }
622 
624 
627  inline bool set_iterations(int32_t num) { iterations=num; return true; }
628  inline int32_t get_iterations() { return iterations; }
629  inline bool set_epsilon (float64_t eps) { epsilon=eps; return true; }
630  inline float64_t get_epsilon() { return epsilon; }
631 
636 
643  void estimate_model_baum_welch(CHMM* train);
645 
646 #ifdef USE_HMMPARALLEL_STRUCTURES
647  void ab_buf_comp(
648  float64_t* p_buf, float64_t* q_buf, float64_t* a_buf,
649  float64_t* b_buf, int32_t dim) ;
650 #else
651  void estimate_model_baum_welch_old(CHMM* train);
652 #endif
653 
658 
662  void estimate_model_viterbi(CHMM* train);
663 
668 
670 
672  bool linear_train(bool right_align=false);
673 
675  bool permutation_entropy(int32_t window_width, int32_t sequence_number);
676 
683  void output_model(bool verbose=false);
684 
686  void output_model_defined(bool verbose=false);
688 
689 
692 
694  void normalize(bool keep_dead_states=false);
695 
699  void add_states(int32_t num_states, float64_t default_val=0);
700 
706  bool append_model(
707  CHMM* append_model, float64_t* cur_out, float64_t* app_out);
708 
713 
715  void chop(float64_t value);
716 
718  void convert_to_log();
719 
721  void init_model_random();
722 
728  void init_model_defined();
729 
731  void clear_model();
732 
734  void clear_model_defined();
735 
737  void copy_model(CHMM* l);
738 
743  void invalidate_model();
744 
748  inline bool get_status() const
749  {
750  return status;
751  }
752 
754  inline float64_t get_pseudo() const
755  {
756  return PSEUDO ;
757  }
758 
760  inline void set_pseudo(float64_t pseudo)
761  {
762  PSEUDO=pseudo ;
763  }
764 
765 #ifdef USE_HMMPARALLEL_STRUCTURES
766  static void* bw_dim_prefetch(void * params);
767  static void* bw_single_dim_prefetch(void * params);
768  static void* vit_dim_prefetch(void * params);
769 #endif
770 
771 #ifdef FIX_POS
772 
775  inline bool set_fix_pos_state(int32_t pos, T_STATES state, char value)
776  {
777  if (!model)
778  return false ;
779  model->set_fix_pos_state(pos, state, N, value) ;
780  return true ;
781  } ;
782 #endif
783 
784 
793  void set_observations(CStringFeatures<uint16_t>* obs, CHMM* hmm=NULL);
794 
798  void set_observation_nocache(CStringFeatures<uint16_t>* obs);
799 
802  {
804  return p_observations;
805  }
807 
875  bool load_definitions(FILE* file, bool verbose, bool initialize=true);
876 
912  bool load_model(FILE* file);
913 
917  bool save_model(FILE* file);
918 
922  bool save_model_derivatives(FILE* file);
923 
927  bool save_model_derivatives_bin(FILE* file);
928 
932  bool save_model_bin(FILE* file);
933 
935  bool check_model_derivatives() ;
937 
943  T_STATES* get_path(int32_t dim, float64_t& prob);
944 
948  bool save_path(FILE* file);
949 
953  bool save_path_derivatives(FILE* file);
954 
958  bool save_path_derivatives_bin(FILE* file);
959 
960 #ifdef USE_HMMDEBUG
961 
962  bool check_path_derivatives() ;
963 #endif //USE_HMMDEBUG
964 
968  bool save_likelihood_bin(FILE* file);
969 
973  bool save_likelihood(FILE* file);
975 
981 
983  inline T_STATES get_N() const { return N ; }
984 
986  inline int32_t get_M() const { return M ; }
987 
992  inline void set_q(T_STATES offset, float64_t value)
993  {
994 #ifdef HMM_DEBUG
995  if (offset>=N)
996  SG_DEBUG("index out of range in set_q(%i,%e) [%i]\n", offset,value,N)
997 #endif
998  end_state_distribution_q[offset]=value;
999  }
1000 
1005  inline void set_p(T_STATES offset, float64_t value)
1006  {
1007 #ifdef HMM_DEBUG
1008  if (offset>=N)
1009  SG_DEBUG("index out of range in set_p(%i,.) [%i]\n", offset,N)
1010 #endif
1011  initial_state_distribution_p[offset]=value;
1012  }
1013 
1019  inline void set_A(T_STATES line_, T_STATES column, float64_t value)
1020  {
1021 #ifdef HMM_DEBUG
1022  if ((line_>N)||(column>N))
1023  SG_DEBUG("index out of range in set_A(%i,%i,.) [%i,%i]\n",line_,column,N,N)
1024 #endif
1025  transition_matrix_A[line_+column*N]=value;
1026  }
1027 
1033  inline void set_a(T_STATES line_, T_STATES column, float64_t value)
1034  {
1035 #ifdef HMM_DEBUG
1036  if ((line_>N)||(column>N))
1037  SG_DEBUG("index out of range in set_a(%i,%i,.) [%i,%i]\n",line_,column,N,N)
1038 #endif
1039  transition_matrix_a[line_+column*N]=value; // look also best_path!
1040  }
1041 
1047  inline void set_B(T_STATES line_, uint16_t column, float64_t value)
1048  {
1049 #ifdef HMM_DEBUG
1050  if ((line_>=N)||(column>=M))
1051  SG_DEBUG("index out of range in set_B(%i,%i) [%i,%i]\n", line_, column,N,M)
1052 #endif
1053  observation_matrix_B[line_*M+column]=value;
1054  }
1055 
1061  inline void set_b(T_STATES line_, uint16_t column, float64_t value)
1062  {
1063 #ifdef HMM_DEBUG
1064  if ((line_>=N)||(column>=M))
1065  SG_DEBUG("index out of range in set_b(%i,%i) [%i,%i]\n", line_, column,N,M)
1066 #endif
1067  observation_matrix_b[line_*M+column]=value;
1068  }
1069 
1076  inline void set_psi(
1077  int32_t time, T_STATES state, T_STATES value, int32_t dimension)
1078  {
1079 #ifdef HMM_DEBUG
1080  if ((time>=p_observations->get_max_vector_length())||(state>N))
1081  SG_DEBUG("index out of range in set_psi(%i,%i,.) [%i,%i]\n",time,state,p_observations->get_max_vector_length(),N)
1082 #endif
1083  STATES_PER_OBSERVATION_PSI(dimension)[time*N+state]=value;
1084  }
1085 
1090  inline float64_t get_q(T_STATES offset) const
1091  {
1092 #ifdef HMM_DEBUG
1093  if (offset>=N)
1094  SG_DEBUG("index out of range in %e=get_q(%i) [%i]\n", end_state_distribution_q[offset],offset,N)
1095 #endif
1096  return end_state_distribution_q[offset];
1097  }
1098 
1103  inline float64_t get_p(T_STATES offset) const
1104  {
1105 #ifdef HMM_DEBUG
1106  if (offset>=N)
1107  SG_DEBUG("index out of range in get_p(%i,.) [%i]\n", offset,N)
1108 #endif
1109  return initial_state_distribution_p[offset];
1110  }
1111 
1117  inline float64_t get_A(T_STATES line_, T_STATES column) const
1118  {
1119 #ifdef HMM_DEBUG
1120  if ((line_>N)||(column>N))
1121  SG_DEBUG("index out of range in get_A(%i,%i) [%i,%i]\n",line_,column,N,N)
1122 #endif
1123  return transition_matrix_A[line_+column*N];
1124  }
1125 
1131  inline float64_t get_a(T_STATES line_, T_STATES column) const
1132  {
1133 #ifdef HMM_DEBUG
1134  if ((line_>N)||(column>N))
1135  SG_DEBUG("index out of range in get_a(%i,%i) [%i,%i]\n",line_,column,N,N)
1136 #endif
1137  return transition_matrix_a[line_+column*N]; // look also best_path()!
1138  }
1139 
1145  inline float64_t get_B(T_STATES line_, uint16_t column) const
1146  {
1147 #ifdef HMM_DEBUG
1148  if ((line_>=N)||(column>=M))
1149  SG_DEBUG("index out of range in get_B(%i,%i) [%i,%i]\n", line_, column,N,M)
1150 #endif
1151  return observation_matrix_B[line_*M+column];
1152  }
1153 
1159  inline float64_t get_b(T_STATES line_, uint16_t column) const
1160  {
1161 #ifdef HMM_DEBUG
1162  if ((line_>=N)||(column>=M))
1163  SG_DEBUG("index out of range in get_b(%i,%i) [%i,%i]\n", line_, column,N,M)
1164 #endif
1165  //SG_PRINT("idx %d\n", line_*M+column)
1166  return observation_matrix_b[line_*M+column];
1167  }
1168 
1176  int32_t time, T_STATES state, int32_t dimension) const
1177  {
1178 #ifdef HMM_DEBUG
1179  if ((time>=p_observations->get_max_vector_length())||(state>N))
1180  SG_DEBUG("index out of range in get_psi(%i,%i) [%i,%i]\n",time,state,p_observations->get_max_vector_length(),N)
1181 #endif
1182  return STATES_PER_OBSERVATION_PSI(dimension)[time*N+state];
1183  }
1184 
1186 
1188  virtual const char* get_name() const { return "HMM"; }
1189 
1190  protected:
1195 
1196  int32_t M;
1197 
1199  int32_t N;
1200 
1203 
1204  // line number during processing input files
1205  int32_t line;
1206 
1209 
1210  //train definition for HMM
1212 
1215 
1218 
1221 
1224 
1227 
1230 
1232  int32_t iterations;
1234 
1237  int32_t conv_it;
1238 
1241 
1244 
1247 
1250 
1253 
1256 
1259 
1260  // true if model is using log likelihood
1262 
1263  // true->ok, false->error
1264  bool status;
1265 
1266  // true->stolen from other HMMs, false->got own
1269 
1270 #ifdef USE_HMMPARALLEL_STRUCTURES
1271 
1272  float64_t** arrayN1 /*[parallel.get_num_threads()]*/ ;
1274  float64_t** arrayN2 /*[parallel.get_num_threads()]*/ ;
1275 #else //USE_HMMPARALLEL_STRUCTURES
1276 
1280 #endif //USE_HMMPARALLEL_STRUCTURES
1281 
1282 #ifdef USE_LOGSUMARRAY
1283 #ifdef USE_HMMPARALLEL_STRUCTURES
1284 
1285  float64_t** arrayS /*[parallel.get_num_threads()]*/;
1286 #else
1287 
1288  float64_t* arrayS;
1289 #endif // USE_HMMPARALLEL_STRUCTURES
1290 #endif // USE_LOGSUMARRAY
1291 
1292 #ifdef USE_HMMPARALLEL_STRUCTURES
1293 
1295  T_ALPHA_BETA* alpha_cache /*[parallel.get_num_threads()]*/ ;
1297  T_ALPHA_BETA* beta_cache /*[parallel.get_num_threads()]*/ ;
1298 
1300  T_STATES** states_per_observation_psi /*[parallel.get_num_threads()]*/ ;
1301 
1303  T_STATES** path /*[parallel.get_num_threads()]*/ ;
1304 
1306  bool* path_prob_updated /*[parallel.get_num_threads()]*/;
1307 
1309  int32_t* path_prob_dimension /*[parallel.get_num_threads()]*/ ;
1310 
1311 #else //USE_HMMPARALLEL_STRUCTURES
1312 
1313  T_ALPHA_BETA alpha_cache;
1315  T_ALPHA_BETA beta_cache;
1316 
1319 
1322 
1325 
1328 
1329 #endif //USE_HMMPARALLEL_STRUCTURES
1330 
1331 
1333  static const int32_t GOTN;
1335  static const int32_t GOTM;
1337  static const int32_t GOTO;
1339  static const int32_t GOTa;
1341  static const int32_t GOTb;
1343  static const int32_t GOTp;
1345  static const int32_t GOTq;
1346 
1348  static const int32_t GOTlearn_a;
1350  static const int32_t GOTlearn_b;
1352  static const int32_t GOTlearn_p;
1354  static const int32_t GOTlearn_q;
1356  static const int32_t GOTconst_a;
1358  static const int32_t GOTconst_b;
1360  static const int32_t GOTconst_p;
1362  static const int32_t GOTconst_q;
1363 
1364  public:
1369 
1372  int32_t time, int32_t state, int32_t dimension)
1373 {
1374  return forward(time, state, dimension) + backward(time, state, dimension) - model_probability(dimension);
1375 }
1376 
1379  int32_t time, int32_t state_i, int32_t state_j, int32_t dimension)
1380 {
1381  return forward(time, state_i, dimension) +
1382  backward(time+1, state_j, dimension) +
1383  get_a(state_i,state_j) + get_b(state_j,p_observations->get_feature(dimension ,time+1)) - model_probability(dimension);
1384 }
1385 
1392 
1396  T_STATES i, uint16_t j, int32_t dimension)
1397 {
1398  float64_t der=0;
1399 
1400  for (int32_t k=0; k<N; k++)
1401  {
1402  if (k!=i || p_observations->get_feature(dimension, k) != j)
1403  der+=get_b(k, p_observations->get_feature(dimension, k));
1404  }
1405 
1406  return der;
1407 }
1408 
1412 inline float64_t model_derivative_p(T_STATES i, int32_t dimension)
1413 {
1414  return backward(0,i,dimension)+get_b(i, p_observations->get_feature(dimension, 0));
1415 }
1416 
1420 inline float64_t model_derivative_q(T_STATES i, int32_t dimension)
1421 {
1422  return forward(p_observations->get_vector_length(dimension)-1,i,dimension) ;
1423 }
1424 
1426 inline float64_t model_derivative_a(T_STATES i, T_STATES j, int32_t dimension)
1427 {
1429  for (int32_t t=0; t<p_observations->get_vector_length(dimension)-1; t++)
1430  sum= CMath::logarithmic_sum(sum, forward(t, i, dimension) + backward(t+1, j, dimension) + get_b(j, p_observations->get_feature(dimension,t+1)));
1431 
1432  return sum;
1433 }
1434 
1435 
1437 inline float64_t model_derivative_b(T_STATES i, uint16_t j, int32_t dimension)
1438 {
1440  for (int32_t t=0; t<p_observations->get_vector_length(dimension); t++)
1441  {
1442  if (p_observations->get_feature(dimension,t)==j)
1443  sum= CMath::logarithmic_sum(sum, forward(t,i,dimension)+backward(t,i,dimension)-get_b(i,p_observations->get_feature(dimension,t)));
1444  }
1445  //if (sum==-CMath::INFTY)
1446  // SG_DEBUG("log derivative is -inf: dim=%i, state=%i, obs=%i\n",dimension, i, j)
1447  return sum;
1448 }
1450 
1457 
1459 inline float64_t path_derivative_p(T_STATES i, int32_t dimension)
1460 {
1461  best_path(dimension);
1462  return (i==PATH(dimension)[0]) ? (exp(-get_p(PATH(dimension)[0]))) : (0) ;
1463 }
1464 
1466 inline float64_t path_derivative_q(T_STATES i, int32_t dimension)
1467 {
1468  best_path(dimension);
1469  return (i==PATH(dimension)[p_observations->get_vector_length(dimension)-1]) ? (exp(-get_q(PATH(dimension)[p_observations->get_vector_length(dimension)-1]))) : 0 ;
1470 }
1471 
1473 inline float64_t path_derivative_a(T_STATES i, T_STATES j, int32_t dimension)
1474 {
1475  prepare_path_derivative(dimension) ;
1476  return (get_A(i,j)==0) ? (0) : (get_A(i,j)*exp(-get_a(i,j))) ;
1477 }
1478 
1480 inline float64_t path_derivative_b(T_STATES i, uint16_t j, int32_t dimension)
1481 {
1482  prepare_path_derivative(dimension) ;
1483  return (get_B(i,j)==0) ? (0) : (get_B(i,j)*exp(-get_b(i,j))) ;
1484 }
1485 
1487 
1488 
1489 protected:
1494 
1495  bool get_numbuffer(FILE* file, char* buffer, int32_t length);
1496 
1498  void open_bracket(FILE* file);
1499 
1501  void close_bracket(FILE* file);
1502 
1504  bool comma_or_space(FILE* file);
1505 
1507  inline void error(int32_t p_line, const char* str)
1508  {
1509  if (p_line)
1510  SG_ERROR("error in line %d %s\n", p_line, str)
1511  else
1512  SG_ERROR("error %s\n", str)
1513  }
1515 
1517  inline void prepare_path_derivative(int32_t dim)
1518  {
1520  return ;
1521  int32_t i,j,t ;
1522  best_path(dim);
1523  //initialize with zeros
1524  for (i=0; i<N; i++)
1525  {
1526  for (j=0; j<N; j++)
1527  set_A(i,j, 0);
1528  for (j=0; j<M; j++)
1529  set_B(i,j, 0);
1530  }
1531 
1532  //counting occurences for A and B
1533  for (t=0; t<p_observations->get_vector_length(dim)-1; t++)
1534  {
1535  set_A(PATH(dim)[t], PATH(dim)[t+1], get_A(PATH(dim)[t], PATH(dim)[t+1])+1);
1536  set_B(PATH(dim)[t], p_observations->get_feature(dim,t), get_B(PATH(dim)[t], p_observations->get_feature(dim,t))+1);
1537  }
1539  path_deriv_dimension=dim ;
1540  path_deriv_updated=true ;
1541  } ;
1543 
1545  inline float64_t forward(int32_t time, int32_t state, int32_t dimension)
1546  {
1547  if (time<1)
1548  time=0;
1549 
1550  if (ALPHA_CACHE(dimension).table && (dimension==ALPHA_CACHE(dimension).dimension) && ALPHA_CACHE(dimension).updated)
1551  {
1552  if (time<p_observations->get_vector_length(dimension))
1553  return ALPHA_CACHE(dimension).table[time*N+state];
1554  else
1555  return ALPHA_CACHE(dimension).sum;
1556  }
1557  else
1558  return forward_comp(time, state, dimension) ;
1559  }
1560 
1562  inline float64_t backward(int32_t time, int32_t state, int32_t dimension)
1563  {
1564  if (BETA_CACHE(dimension).table && (dimension==BETA_CACHE(dimension).dimension) && (BETA_CACHE(dimension).updated))
1565  {
1566  if (time<0)
1567  return BETA_CACHE(dimension).sum;
1568  if (time<p_observations->get_vector_length(dimension))
1569  return BETA_CACHE(dimension).table[time*N+state];
1570  else
1571  return -CMath::INFTY;
1572  }
1573  else
1574  return backward_comp(time, state, dimension) ;
1575  }
1576 
1577 };
1578 }
1579 #endif

SHOGUN Machine Learning Toolbox - Documentation