SHOGUN  3.2.2
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
DynProg.h
Go to the documentation of this file.
1 /*
2  * This program is free software; you can redistribute it and/or modify
3  * it under the terms of the GNU General Public License as published by
4  * the Free Software Foundation; either version 3 of the License, or
5  * (at your option) any later version.
6  *
7  * Written (W) 1999-2009 Gunnar Raetsch
8  * Written (W) 1999-2009 Soeren Sonnenburg
9  * Written (W) 2008-2009 Jonas Behr
10  * Copyright (C) 1999-2009 Fraunhofer Institute FIRST and Max-Planck-Society
11  */
12 
13 #ifndef __CDYNPROG_H__
14 #define __CDYNPROG_H__
15 
17 #include <shogun/lib/common.h>
18 #include <shogun/base/SGObject.h>
19 #include <shogun/io/SGIO.h>
20 #include <shogun/lib/config.h>
23 #include <shogun/structure/Plif.h>
31 #include <shogun/lib/Time.h>
32 
33 
34 namespace shogun
35 {
36  template <class T> class CSparseFeatures;
37  class CIntronList;
38  class CPlifMatrix;
39  class CSegmentLoss;
40 
41  template <class T> class CDynamicArray;
42 
43 //#define DYNPROG_TIMING
44 
45 #ifdef USE_BIGSTATES
46 typedef uint16_t T_STATES ;
47 #else
48 typedef uint8_t T_STATES ;
49 #endif
50 typedef T_STATES* P_STATES ;
51 
52 #ifndef DOXYGEN_SHOULD_SKIP_THIS
53 
54 struct segment_loss_struct
55 {
57  int32_t maxlookback;
59  int32_t seqlen;
61  int32_t *segments_changed;
63  float64_t *num_segment_id;
65  int32_t *length_segment_id ;
66 };
67 #endif
68 
74 class CDynProg : public CSGObject
75 {
76 public:
81  CDynProg(int32_t p_num_svms=8);
82  virtual ~CDynProg();
83 
84  // model related functions
90  void set_num_states(int32_t N);
91 
93  int32_t get_num_states();
94 
96  int32_t get_num_svms();
97 
103  void init_content_svm_value_array(const int32_t p_num_svms);
104 
112  void init_tiling_data(int32_t* probe_pos, float64_t* intensities, const int32_t num_probes);
113 
120  void precompute_tiling_plifs(CPlif** PEN, const int32_t* tiling_plif_ids, const int32_t num_tiling_plifs);
121 
126  void resize_lin_feat(int32_t num_new_feat);
132 
138 
143  void set_a(SGMatrix<float64_t> a);
144 
149  void set_a_id(SGMatrix<int32_t> a);
150 
156 
161  void init_mod_words_array(SGMatrix<int32_t> p_mod_words_array);
162 
168  bool check_svm_arrays();
169 
175 
182  int32_t get_num_positions();
183 
194 
199  void set_pos(SGVector<int32_t> pos);
200 
206  void set_orf_info(SGMatrix<int32_t> orf_info);
207 
212  void set_gene_string(SGVector<char> genestr);
213 
214 
219  void set_dict_weights(SGMatrix<float64_t> dictionary_weights);
220 
226 
233  void best_path_set_segment_ids_mask(int32_t* segment_ids, float64_t* segment_mask, int32_t m);
234 
237 
242  void set_plif_matrices(CPlifMatrix* pm);
243 
244  // best_path result retrieval functions
250 
256 
262 
263 
272  void compute_nbest_paths(int32_t max_num_signals,
273  bool use_orf, int16_t nbest, bool with_loss, bool with_multiple_sequences);
274 
276 
289  int32_t* my_state_seq, int32_t *my_pos_seq,
290  int32_t my_seq_len, const float64_t *seq_array, int32_t max_num_signals);
291 
292  // additional best_path_trans_deriv functions
297  void set_my_state_seq(int32_t* my_state_seq);
298 
303  void set_my_pos_seq(int32_t* my_pos_seq);
304 
312  void get_path_scores(float64_t** my_scores, int32_t* seq_len);
313 
321  void get_path_losses(float64_t** my_losses, int32_t* seq_len);
322 
323 
325  inline T_STATES get_N() const
326  {
327  return m_N ;
328  }
329 
334  inline void set_q(T_STATES offset, float64_t value)
335  {
336  m_end_state_distribution_q[offset]=value;
337  }
338 
343  inline void set_p(T_STATES offset, float64_t value)
344  {
345  m_initial_state_distribution_p[offset]=value;
346  }
347 
354  inline void set_a(T_STATES line_, T_STATES column, float64_t value)
355  {
356  m_transition_matrix_a.element(line_,column)=value; // look also best_path!
357  }
358 
364  inline float64_t get_q(T_STATES offset) const
365  {
366  return m_end_state_distribution_q[offset];
367  }
368 
374  inline float64_t get_q_deriv(T_STATES offset) const
375  {
376  return m_end_state_distribution_q_deriv[offset];
377  }
378 
384  inline float64_t get_p(T_STATES offset) const
385  {
386  return m_initial_state_distribution_p[offset];
387  }
388 
394  inline float64_t get_p_deriv(T_STATES offset) const
395  {
397  }
398 
403 
410  inline float64_t* get_lin_feat(int32_t & dim1, int32_t & dim2)
411  {
412  m_lin_feat.get_array_size(dim1, dim2);
413  return m_lin_feat.get_array();
414  }
423  inline void set_lin_feat(float64_t* p_lin_feat, int32_t p_num_svms, int32_t p_seq_len)
424  {
425  m_lin_feat.set_array(p_lin_feat, p_num_svms, p_seq_len, true, true);
426  }
431  void create_word_string();
432 
435  void precompute_stop_codons();
436 
443  inline float64_t get_a(T_STATES line_, T_STATES column) const
444  {
445  return m_transition_matrix_a.element(line_, column); // look also best_path()!
446  }
447 
454  inline float64_t get_a_deriv(T_STATES line_, T_STATES column) const
455  {
456  return m_transition_matrix_a_deriv.element(line_, column); // look also best_path()!
457  }
459 
464  void set_intron_list(CIntronList* intron_list, int32_t num_plifs);
465 
468  {
469  return m_seg_loss_obj;
470  }
471 
478  void long_transition_settings(bool use_long_transitions, int32_t threshold, int32_t max_len)
479  {
480  m_long_transitions = use_long_transitions;
481  m_long_transition_threshold = threshold;
482  SG_DEBUG("ignoring max_len\n")
483  //m_long_transition_max = max_len;
484  }
485 
486 protected:
487 
488  /* helper functions */
489 
499  void lookup_content_svm_values(const int32_t from_state,
500  const int32_t to_state, const int32_t from_pos, const int32_t to_pos,
501  float64_t* svm_values, int32_t frame);
502 
510  inline void lookup_tiling_plif_values(const int32_t from_state,
511  const int32_t to_state, const int32_t len, float64_t* svm_values);
512 
517  inline int32_t find_frame(const int32_t from_state);
518 
527  inline int32_t raw_intensities_interval_query(
528  const int32_t from_pos, const int32_t to_pos, float64_t* intensities, int32_t type);
529 
530 #ifndef DOXYGEN_SHOULD_SKIP_THIS
531 
532  struct svm_values_struct
533  {
535  int32_t maxlookback;
537  int32_t seqlen;
538 
540  int32_t* start_pos;
542  float64_t ** svm_values_unnormalized;
544  float64_t * svm_values;
546  bool *** word_used;
548  int32_t **num_unique_words;
549  };
550 #endif // DOXYGEN_SHOULD_SKIP_THIS
551 
560  bool extend_orf(int32_t orf_from, int32_t orf_to, int32_t start, int32_t &last_pos, int32_t to);
561 
563  virtual const char* get_name() const { return "DynProg"; }
564 
565 private:
566 
567  T_STATES trans_list_len;
568  T_STATES **trans_list_forward;
569  T_STATES *trans_list_forward_cnt;
570  float64_t **trans_list_forward_val;
571  int32_t **trans_list_forward_id;
572  bool mem_initialized;
573 
574 #ifdef DYNPROG_TIMING
575  CTime MyTime;
576  CTime MyTime2;
577  CTime MyTime3;
578 
579  float64_t segment_init_time;
580  float64_t segment_pos_time;
581  float64_t segment_clean_time;
582  float64_t segment_extend_time;
583  float64_t orf_time;
584  float64_t content_time;
585  float64_t content_penalty_time;
586  float64_t content_svm_values_time ;
587  float64_t content_plifs_time ;
588  float64_t svm_init_time;
589  float64_t svm_pos_time;
590  float64_t inner_loop_time;
591  float64_t inner_loop_max_time ;
592  float64_t svm_clean_time;
593  float64_t long_transition_time ;
594 #endif
595 
596 
597 protected:
602 
603  int32_t m_N;
604 
609 
613 
617 
619 
621  int32_t m_num_degrees;
623  int32_t m_num_svms;
624 
647 
649 // CDynamicArray<int32_t> m_svm_pos_start;
655  int32_t m_max_a_id;
656 
657  // input arguments
663  int32_t m_seq_len;
686  uint16_t*** m_wordstr;
703 
707 
708  // output arguments
715 
722 
727 
731 
734 
740 
744  int32_t* m_probe_pos;
750  int32_t m_num_raw_data;
751 
761  //int32_t m_long_transition_max ;
762 
766  static int32_t word_degree_default[4];
767 
771  static int32_t cum_num_words_default[5];
772 
775  static int32_t frame_plifs[3];
776 
779  static int32_t num_words_default[4];
780 
782  static int32_t mod_words_default[32];
783 
785  static bool sign_words_default[16];
786 
788  static int32_t string_words_default[16];
789 };
790 }
791 #endif

SHOGUN Machine Learning Toolbox - Documentation