SHOGUN  6.0.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules
DynProg.cpp
Go to the documentation of this file.
1 /*
2  * This program is free software; you can redistribute it and/or modify
3  * it under the terms of the GNU General Public License as published by
4  * the Free Software Foundation; either version 2 of the License, or
5  * (at your option) any later version.
6  *
7  * Written (W) 1999-2007 Soeren Sonnenburg
8  * Written (W) 1999-2007 Gunnar Raetsch
9  * Written (W) 2008-2009 Jonas Behr
10  * Copyright (C) 1999-2009 Fraunhofer Institute FIRST and Max-Planck-Society
11  */
12 
15 #include <shogun/io/SGIO.h>
16 #include <shogun/lib/config.h>
19 #include <shogun/structure/Plif.h>
21 
22 #include <stdlib.h>
23 #include <stdio.h>
24 #include <time.h>
25 #include <ctype.h>
26 #include <limits.h>
27 
28 using namespace shogun;
29 
30 //#define USE_TMP_ARRAYCLASS
31 //#define DYNPROG_DEBUG
32 
33 int32_t CDynProg::word_degree_default[4]={3,4,5,6} ;
34 int32_t CDynProg::cum_num_words_default[5]={0,64,320,1344,5440} ;
35 int32_t CDynProg::frame_plifs[3]={4,5,6};
36 int32_t CDynProg::num_words_default[4]= {64,256,1024,4096} ;
37 int32_t CDynProg::mod_words_default[32] = {1,1,1,1,1,1,1,1,
38  1,1,1,1,1,1,1,1,
39  0,0,0,0,0,0,0,0,
40  0,0,0,0,0,0,0,0} ;
41 bool CDynProg::sign_words_default[16] = {true,true,true,true,true,true,true,true,
42  false,false,false,false,false,false,false,false} ; // whether to use counts or signum of counts
43 int32_t CDynProg::string_words_default[16] = {0,0,0,0,0,0,0,0,
44  1,1,1,1,1,1,1,1} ; // which string should be used
45 
46 CDynProg::CDynProg(int32_t num_svms /*= 8 */)
47 : CSGObject(), m_transition_matrix_a_id(1,1), m_transition_matrix_a(1,1),
48  m_transition_matrix_a_deriv(1,1), m_initial_state_distribution_p(1),
49  m_initial_state_distribution_p_deriv(1), m_end_state_distribution_q(1),
50  m_end_state_distribution_q_deriv(1),
51 
52  // multi svm
53  m_num_degrees(4),
54  m_num_svms(num_svms),
55  m_word_degree(word_degree_default, m_num_degrees, true, true),
56  m_cum_num_words(cum_num_words_default, m_num_degrees+1, true, true),
57  m_cum_num_words_array(m_cum_num_words.get_array()),
58  m_num_words(num_words_default, m_num_degrees, true, true),
59  m_num_words_array(m_num_words.get_array()),
60  m_mod_words(mod_words_default, m_num_svms, 2, true, true),
61  m_mod_words_array(m_mod_words.get_array()),
62  m_sign_words(sign_words_default, m_num_svms, true, true),
63  m_sign_words_array(m_sign_words.get_array()),
64  m_string_words(string_words_default, m_num_svms, true, true),
65  m_string_words_array(m_string_words.get_array()),
66  //m_svm_pos_start(m_num_degrees),
67  m_num_unique_words(m_num_degrees),
68  m_svm_arrays_clean(true),
69 
70  m_max_a_id(0), m_observation_matrix(1,1,1),
71  m_pos(1),
72  m_seq_len(0),
73  m_orf_info(1,2),
74  m_plif_list(1),
75  m_genestr(1), m_wordstr(NULL), m_dict_weights(1,1), m_segment_loss(1,1,2),
76  m_segment_ids(1),
77  m_segment_mask(1),
78  m_my_state_seq(1),
79  m_my_pos_seq(1),
80  m_my_scores(1),
81  m_my_losses(1),
82  m_scores(1),
83  m_states(1,1),
84  m_positions(1,1),
85 
86  m_seq_sparse1(NULL),
87  m_seq_sparse2(NULL),
88  m_plif_matrices(NULL),
89 
90  m_genestr_stop(1),
91  m_intron_list(NULL),
92  m_num_intron_plifs(0),
93  m_lin_feat(1,1), //by Jonas
94  m_raw_intensities(NULL),
95  m_probe_pos(NULL),
96  m_num_probes_cum(NULL),
97  m_num_lin_feat_plifs_cum(NULL),
98  m_num_raw_data(0),
99 
100  m_long_transitions(true),
101  m_long_transition_threshold(1000)
102 {
103  trans_list_forward = NULL ;
104  trans_list_forward_cnt = NULL ;
105  trans_list_forward_val = NULL ;
106  trans_list_forward_id = NULL ;
107  trans_list_len = 0 ;
108 
109  mem_initialized = true ;
110 
111  m_N=1;
112 
113  m_raw_intensities = NULL;
114  m_probe_pos = NULL;
115  m_num_probes_cum = SG_MALLOC(int32_t, 100);
116  m_num_probes_cum[0] = 0;
117  //m_use_tiling=false;
118  m_num_lin_feat_plifs_cum = SG_MALLOC(int32_t, 100);
120  m_num_raw_data = 0;
122 }
123 
125 {
126  if (trans_list_forward_cnt)
127  SG_FREE(trans_list_forward_cnt);
128  if (trans_list_forward)
129  {
130  for (int32_t i=0; i<trans_list_len; i++)
131  {
132  if (trans_list_forward[i])
133  SG_FREE(trans_list_forward[i]);
134  }
135  SG_FREE(trans_list_forward);
136  }
137  if (trans_list_forward_val)
138  {
139  for (int32_t i=0; i<trans_list_len; i++)
140  {
141  if (trans_list_forward_val[i])
142  SG_FREE(trans_list_forward_val[i]);
143  }
144  SG_FREE(trans_list_forward_val);
145  }
146  if (trans_list_forward_id)
147  {
148  for (int32_t i=0; i<trans_list_len; i++)
149  {
150  if (trans_list_forward_id[i])
151  SG_FREE(trans_list_forward_id[i]);
152  }
153  SG_FREE(trans_list_forward_id);
154  }
155  if (m_raw_intensities)
156  SG_FREE(m_raw_intensities);
157  if (m_probe_pos)
158  SG_FREE(m_probe_pos);
159  if (m_num_probes_cum)
160  SG_FREE(m_num_probes_cum);
162  SG_FREE(m_num_lin_feat_plifs_cum);
163 
164  delete m_intron_list;
165 
170 }
171 
174 {
175  return m_num_svms;
176 }
177 
179 {
180  int32_t length=m_genestr.get_dim1();
181 
182  m_genestr_stop.resize_array(length) ;
184  {
185  for (int32_t i=0; i<length-2; i++)
186  if ((m_genestr[i]=='t' || m_genestr[i]=='T') &&
187  (((m_genestr[i+1]=='a' || m_genestr[i+1]=='A') &&
188  (m_genestr[i+2]=='a' || m_genestr[i+2]=='g' || m_genestr[i+2]=='A' || m_genestr[i+2]=='G')) ||
189  ((m_genestr[i+1]=='g'||m_genestr[i+1]=='G') && (m_genestr[i+2]=='a' || m_genestr[i+2]=='A') )))
190  {
191  m_genestr_stop.element(i)=true ;
192  }
193  else
194  m_genestr_stop.element(i)=false ;
195  m_genestr_stop.element(length-2)=false ;
196  m_genestr_stop.element(length-1)=false ;
197  }
198 }
199 
200 void CDynProg::set_num_states(int32_t p_N)
201 {
202  m_N=p_N ;
203 
211 
213 }
214 
216 {
217  return m_N;
218 }
219 
221  int32_t* probe_pos, float64_t* intensities, const int32_t num_probes)
222 {
223  m_num_raw_data++;
225 
226  int32_t* tmp_probe_pos = SG_MALLOC(int32_t, m_num_probes_cum[m_num_raw_data]);
227  float64_t* tmp_raw_intensities = SG_MALLOC(float64_t, m_num_probes_cum[m_num_raw_data]);
228 
229 
230  if (m_num_raw_data==1){
231  sg_memcpy(tmp_probe_pos, probe_pos, num_probes*sizeof(int32_t));
232  sg_memcpy(tmp_raw_intensities, intensities, num_probes*sizeof(float64_t));
233  //SG_PRINT("raw_intens:%f \n",*tmp_raw_intensities+2)
234  }else{
235  sg_memcpy(tmp_probe_pos, m_probe_pos, m_num_probes_cum[m_num_raw_data-1]*sizeof(int32_t));
236  sg_memcpy(tmp_raw_intensities, m_raw_intensities, m_num_probes_cum[m_num_raw_data-1]*sizeof(float64_t));
237  sg_memcpy(tmp_probe_pos+m_num_probes_cum[m_num_raw_data-1], probe_pos, num_probes*sizeof(int32_t));
238  sg_memcpy(tmp_raw_intensities+m_num_probes_cum[m_num_raw_data-1], intensities, num_probes*sizeof(float64_t));
239  }
240  SG_FREE(m_probe_pos);
241  SG_FREE(m_raw_intensities);
242  m_probe_pos = tmp_probe_pos; //SG_MALLOC(int32_t, num_probes);
243  m_raw_intensities = tmp_raw_intensities;//SG_MALLOC(float64_t, num_probes);
244 
245  //sg_memcpy(m_probe_pos, probe_pos, num_probes*sizeof(int32_t));
246  //sg_memcpy(m_raw_intensities, intensities, num_probes*sizeof(float64_t));
247 
248 }
249 
250 void CDynProg::init_content_svm_value_array(const int32_t p_num_svms)
251 {
252  m_lin_feat.resize_array(p_num_svms, m_seq_len);
253 
254  // initialize array
255  for (int s=0; s<p_num_svms; s++)
256  for (int p=0; p<m_seq_len; p++)
257  m_lin_feat.set_element(0.0, s, p) ;
258 }
259 
260 void CDynProg::resize_lin_feat(const int32_t num_new_feat)
261 {
262  int32_t dim1, dim2;
263  m_lin_feat.get_array_size(dim1, dim2);
265  ASSERT(dim2==m_seq_len) // == number of candidate positions
266 
267 
268 
269  float64_t* arr = m_lin_feat.get_array();
270  float64_t* tmp = SG_MALLOC(float64_t, (dim1+num_new_feat)*dim2);
271  memset(tmp, 0, (dim1+num_new_feat)*dim2*sizeof(float64_t)) ;
272  for(int32_t j=0;j<m_seq_len;j++)
273  for(int32_t k=0;k<m_num_lin_feat_plifs_cum[m_num_raw_data-1];k++)
274  tmp[j*(dim1+num_new_feat)+k] = arr[j*dim1+k];
275 
276  m_lin_feat.set_array(tmp, dim1+num_new_feat,dim2, true, true);// copy array and free it later
277  SG_FREE(tmp);
278 
279  /*for(int32_t j=0;j<5;j++)
280  {
281  for(int32_t k=0;k<m_num_lin_feat_plifs_cum[m_num_raw_data];k++)
282  {
283  SG_PRINT("(%i,%i)%f ",k,j,m_lin_feat.get_element(k,j))
284  }
285  SG_PRINT("\n")
286  }
287  m_lin_feat.get_array_size(dim1,dim2);
288  SG_PRINT("resize_lin_feat: dim1:%i, dim2:%i\n",dim1,dim2)*/
289 
290  //SG_PRINT("resize_lin_feat: done\n")
291 }
292 
294  CPlif** PEN, const int32_t* tiling_plif_ids, const int32_t num_tiling_plifs)
295 {
297  float64_t* tiling_plif = SG_MALLOC(float64_t, num_tiling_plifs);
300  svm_value[i]=0.0;
301  int32_t* tiling_rows = SG_MALLOC(int32_t, num_tiling_plifs);
302  for (int32_t i=0; i<num_tiling_plifs; i++)
303  {
304  tiling_plif[i]=0.0;
305  CPlif * plif = PEN[tiling_plif_ids[i]];
306  tiling_rows[i] = plif->get_use_svm();
307 
308  ASSERT(tiling_rows[i]-1==m_num_lin_feat_plifs_cum[m_num_raw_data-1]+i)
309  }
310  resize_lin_feat(num_tiling_plifs);
311 
312 
313  int32_t* p_tiling_pos = &m_probe_pos[m_num_probes_cum[m_num_raw_data-1]];
314  float64_t* p_tiling_data = &m_raw_intensities[m_num_probes_cum[m_num_raw_data-1]];
315  int32_t num=m_num_probes_cum[m_num_raw_data-1];
316 
317  for (int32_t pos_idx=0;pos_idx<m_seq_len;pos_idx++)
318  {
319  while (num<m_num_probes_cum[m_num_raw_data]&&*p_tiling_pos<m_pos[pos_idx])
320  {
321  for (int32_t i=0; i<num_tiling_plifs; i++)
322  {
323  svm_value[m_num_lin_feat_plifs_cum[m_num_raw_data-1]+i]=*p_tiling_data;
324  CPlif * plif = PEN[tiling_plif_ids[i]];
325  ASSERT(m_num_lin_feat_plifs_cum[m_num_raw_data-1]+i==plif->get_use_svm()-1)
326  plif->set_do_calc(true);
327  tiling_plif[i]+=plif->lookup_penalty(0,svm_value);
328  plif->set_do_calc(false);
329  }
330  p_tiling_data++;
331  p_tiling_pos++;
332  num++;
333  }
334  for (int32_t i=0; i<num_tiling_plifs; i++)
335  m_lin_feat.set_element(tiling_plif[i],tiling_rows[i]-1,pos_idx);
336  }
337  SG_FREE(svm_value);
338  SG_FREE(tiling_plif);
339  SG_FREE(tiling_rows);
340 }
341 
343 {
344  SG_FREE(m_wordstr);
345  m_wordstr=SG_MALLOC(uint16_t**, 5440);
346  int32_t k=0;
347  int32_t genestr_len=m_genestr.get_dim1();
348 
349  m_wordstr[k]=SG_MALLOC(uint16_t*, m_num_degrees);
350  for (int32_t j=0; j<m_num_degrees; j++)
351  {
352  m_wordstr[k][j]=NULL ;
353  {
354  m_wordstr[k][j]=SG_MALLOC(uint16_t, genestr_len);
355  for (int32_t i=0; i<genestr_len; i++)
356  switch (m_genestr[i])
357  {
358  case 'A':
359  case 'a': m_wordstr[k][j][i]=0 ; break ;
360  case 'C':
361  case 'c': m_wordstr[k][j][i]=1 ; break ;
362  case 'G':
363  case 'g': m_wordstr[k][j][i]=2 ; break ;
364  case 'T':
365  case 't': m_wordstr[k][j][i]=3 ; break ;
366  default: ASSERT(0)
367  }
369  }
370  }
371 }
372 
374 {
375  for (int32_t s=0; s<m_num_svms; s++)
376  m_lin_feat.set_element(0.0, s, 0);
377 
378  for (int32_t p=0 ; p<m_seq_len-1 ; p++)
379  {
380  int32_t from_pos = m_pos[p];
381  int32_t to_pos = m_pos[p+1];
382  float64_t* my_svm_values_unnormalized = SG_MALLOC(float64_t, m_num_svms);
383  //SG_PRINT("%i(%i->%i) ",p,from_pos, to_pos)
384 
385  ASSERT(from_pos<=m_genestr.get_dim1())
386  ASSERT(to_pos<=m_genestr.get_dim1())
387 
388  for (int32_t s=0; s<m_num_svms; s++)
389  my_svm_values_unnormalized[s]=0.0;//precomputed_svm_values.element(s,p);
390 
391  for (int32_t i=from_pos; i<to_pos; i++)
392  {
393  for (int32_t j=0; j<m_num_degrees; j++)
394  {
395  uint16_t word = m_wordstr[0][j][i] ;
396  for (int32_t s=0; s<m_num_svms; s++)
397  {
398  // check if this k-mer should be considered for this SVM
399  if (m_mod_words.get_element(s,0)==3 && i%3!=m_mod_words.get_element(s,1))
400  continue;
401  my_svm_values_unnormalized[s] += m_dict_weights[(word+m_cum_num_words_array[j])+s*m_cum_num_words_array[m_num_degrees]] ;
402  }
403  }
404  }
405  for (int32_t s=0; s<m_num_svms; s++)
406  {
407  float64_t prev = m_lin_feat.get_element(s, p);
408  //SG_PRINT("elem (%i, %i, %f)\n", s, p, prev)
409  if (prev<-1e20 || prev>1e20)
410  {
411  SG_ERROR("initialization missing (%i, %i, %f)\n", s, p, prev)
412  prev=0 ;
413  }
414  m_lin_feat.set_element(prev + my_svm_values_unnormalized[s], s, p+1);
415  }
416  SG_FREE(my_svm_values_unnormalized);
417  }
418  //for (int32_t j=0; j<m_num_degrees; j++)
419  // SG_FREE(m_wordstr[0][j]);
420  //SG_FREE(m_wordstr[0]);
421 }
422 
424 {
425  if (!(p.vlen==m_N))
426  SG_ERROR("length of start prob vector p (%i) is not equal to the number of states (%i), N: %i\n",p.vlen, m_N)
427 
429 }
430 
432 {
433  if (!(q.vlen==m_N))
434  SG_ERROR("length of end prob vector q (%i) is not equal to the number of states (%i), N: %i\n",q.vlen, m_N)
436 }
437 
439 {
440  ASSERT(a.num_cols==m_N)
441  ASSERT(a.num_rows==m_N)
442  m_transition_matrix_a.set_array(a.matrix, m_N, m_N, true, true);
444 }
445 
447 {
448  ASSERT(a.num_cols==m_N)
449  ASSERT(a.num_rows==m_N)
451  m_max_a_id = 0;
452  for (int32_t i=0; i<m_N; i++)
453  {
454  for (int32_t j=0; j<m_N; j++)
456  }
457 }
458 
460 {
461  int32_t num_trans=a_trans.num_rows;
462  int32_t num_cols=a_trans.num_cols;
463 
464  //CMath::display_matrix(a_trans.matrix,num_trans, num_cols,"a_trans");
465 
466  if (!((num_cols==3) || (num_cols==4)))
467  SG_ERROR("!((num_cols==3) || (num_cols==4)), num_cols: %i\n",num_cols)
468 
469  SG_FREE(trans_list_forward);
470  SG_FREE(trans_list_forward_cnt);
471  SG_FREE(trans_list_forward_val);
472  SG_FREE(trans_list_forward_id);
473 
474  trans_list_forward = NULL ;
475  trans_list_forward_cnt = NULL ;
476  trans_list_forward_val = NULL ;
477  trans_list_len = 0 ;
478 
481 
482  mem_initialized = true ;
483 
484  trans_list_forward_cnt=NULL ;
485  trans_list_len = m_N ;
486  trans_list_forward = SG_MALLOC(T_STATES*, m_N);
487  trans_list_forward_cnt = SG_MALLOC(T_STATES, m_N);
488  trans_list_forward_val = SG_MALLOC(float64_t*, m_N);
489  trans_list_forward_id = SG_MALLOC(int32_t*, m_N);
490 
491  int32_t start_idx=0;
492  for (int32_t j=0; j<m_N; j++)
493  {
494  int32_t old_start_idx=start_idx;
495 
496  while (start_idx<num_trans && a_trans.matrix[start_idx+num_trans]==j)
497  {
498  start_idx++;
499 
500  if (start_idx>1 && start_idx<num_trans)
501  ASSERT(a_trans.matrix[start_idx+num_trans-1] <= a_trans.matrix[start_idx+num_trans])
502  }
503 
504  if (start_idx>1 && start_idx<num_trans)
505  ASSERT(a_trans.matrix[start_idx+num_trans-1] <= a_trans.matrix[start_idx+num_trans])
506 
507  int32_t len=start_idx-old_start_idx;
508  ASSERT(len>=0)
509 
510  trans_list_forward_cnt[j] = 0 ;
511 
512  if (len>0)
513  {
514  trans_list_forward[j] = SG_MALLOC(T_STATES, len);
515  trans_list_forward_val[j] = SG_MALLOC(float64_t, len);
516  trans_list_forward_id[j] = SG_MALLOC(int32_t, len);
517  }
518  else
519  {
520  trans_list_forward[j] = NULL;
521  trans_list_forward_val[j] = NULL;
522  trans_list_forward_id[j] = NULL;
523  }
524  }
525 
526  for (int32_t i=0; i<num_trans; i++)
527  {
528  int32_t from_state = (int32_t)a_trans.matrix[i] ;
529  int32_t to_state = (int32_t)a_trans.matrix[i+num_trans] ;
530  float64_t val = a_trans.matrix[i+num_trans*2] ;
531  int32_t id = 0 ;
532  if (num_cols==4)
533  id = (int32_t)a_trans.matrix[i+num_trans*3] ;
534  //SG_DEBUG("id=%i\n", id)
535 
536  ASSERT(to_state>=0 && to_state<m_N)
537  ASSERT(from_state>=0 && from_state<m_N)
538 
539  trans_list_forward[to_state][trans_list_forward_cnt[to_state]]=from_state ;
540  trans_list_forward_val[to_state][trans_list_forward_cnt[to_state]]=val ;
541  trans_list_forward_id[to_state][trans_list_forward_cnt[to_state]]=id ;
542  trans_list_forward_cnt[to_state]++ ;
543  m_transition_matrix_a.element(from_state, to_state) = val ;
544  m_transition_matrix_a_id.element(from_state, to_state) = id ;
545  //SG_PRINT("from_state:%i to_state:%i trans_matrix_a_id:%i \n",from_state, to_state,m_transition_matrix_a_id.element(from_state, to_state))
546  } ;
547 
548  m_max_a_id = 0 ;
549  for (int32_t i=0; i<m_N; i++)
550  for (int32_t j=0; j<m_N; j++)
551  {
552  //if (m_transition_matrix_a_id.element(i,j))
553  //SG_DEBUG("(%i,%i)=%i\n", i,j, m_transition_matrix_a_id.element(i,j))
555  }
556  //SG_DEBUG("m_max_a_id=%i\n", m_max_a_id)
557 }
558 
559 
561 {
562  //for (int32_t i=0; i<mod_words_input.num_cols; i++)
563  //{
564  // for (int32_t j=0; j<mod_words_input.num_rows; j++)
565  // SG_PRINT("%i ",mod_words_input[i*mod_words_input.num_rows+j])
566  // SG_PRINT("\n")
567  //}
568  m_svm_arrays_clean=false ;
569 
570  ASSERT(m_num_svms==mod_words_input.num_rows)
571  ASSERT(mod_words_input.num_cols==2)
572 
573  m_mod_words.set_array(mod_words_input.matrix, mod_words_input.num_rows, 2, true, true) ;
575 
576  /*SG_DEBUG("m_mod_words=[")
577  for (int32_t i=0; i<mod_words_input.num_rows; i++)
578  SG_DEBUG("%i, ", p_mod_words_array[i])
579  SG_DEBUG("]\n") */
580 }
581 
583 {
584  //SG_DEBUG("wd_dim1=%d, m_cum_num_words=%d, m_num_words=%d, m_svm_pos_start=%d, num_uniq_w=%d, mod_words_dims=(%d,%d), sign_w=%d,string_w=%d\n m_num_degrees=%d, m_num_svms=%d, m_num_strings=%d", m_word_degree.get_dim1(), m_cum_num_words.get_dim1(), m_num_words.get_dim1(), m_svm_pos_start.get_dim1(), m_num_unique_words.get_dim1(), m_mod_words.get_dim1(), m_mod_words.get_dim2(), m_sign_words.get_dim1(), m_string_words.get_dim1(), m_num_degrees, m_num_svms, m_num_strings)
588  //(word_used.get_dim1()==m_num_degrees) &&
589  //(word_used.get_dim2()==m_num_words[m_num_degrees-1]) &&
590  //(word_used.get_dim3()==m_num_strings) &&
591  // (svm_values_unnormalized.get_dim1()==m_num_degrees) &&
592  // (svm_values_unnormalized.get_dim2()==m_num_svms) &&
593  //(m_svm_pos_start.get_dim1()==m_num_degrees) &&
596  (m_mod_words.get_dim2()==2) &&
599  {
600  m_svm_arrays_clean=true ;
601  return true ;
602  }
603  else
604  {
607  (m_mod_words.get_dim2()==2) &&
610  SG_PRINT("OK\n")
611  else
612  SG_PRINT("not OK\n")
613 
615  SG_WARNING("SVM array: word_degree.get_dim1()!=m_num_degrees")
617  SG_WARNING("SVM array: m_cum_num_words.get_dim1()!=m_num_degrees+1")
619  SG_WARNING("SVM array: m_num_words.get_dim1()==m_num_degrees")
620  //if (!(m_svm_pos_start.get_dim1()==m_num_degrees))
621  // SG_WARNING("SVM array: m_svm_pos_start.get_dim1()!=m_num_degrees")
623  SG_WARNING("SVM array: m_num_unique_words.get_dim1()!=m_num_degrees")
624  if (!(m_mod_words.get_dim1()==m_num_svms))
625  SG_WARNING("SVM array: m_mod_words.get_dim1()!=num_svms")
626  if (!(m_mod_words.get_dim2()==2))
627  SG_WARNING("SVM array: m_mod_words.get_dim2()!=2")
628  if (!(m_sign_words.get_dim1()==m_num_svms))
629  SG_WARNING("SVM array: m_sign_words.get_dim1()!=num_svms")
631  SG_WARNING("SVM array: m_string_words.get_dim1()!=num_svms")
632 
633  m_svm_arrays_clean=false ;
634  return false ;
635  }
636 }
637 
639 {
640  if (seq.num_dims!=3)
641  SG_ERROR("Expected 3-dimensional Matrix\n")
642 
643  int32_t N=seq.dims[0];
644  int32_t cand_pos=seq.dims[1];
645  int32_t max_num_features=seq.dims[2];
646 
647  if (!m_svm_arrays_clean)
648  {
649  SG_ERROR("SVM arrays not clean")
650  return ;
651  } ;
652 
653  ASSERT(N==m_N)
654  ASSERT(cand_pos==m_seq_len)
657 
658  m_observation_matrix.set_array(seq.array, N, m_seq_len, max_num_features, true, true) ;
659 }
661 {
662  return m_seq_len;
663 }
664 
666 {
667  ASSERT(seg_path.num_rows==2)
668  ASSERT(seg_path.num_cols==m_seq_len)
669 
670  if (seg_path.matrix!=NULL)
671  {
672  int32_t *segment_ids = SG_MALLOC(int32_t, m_seq_len);
673  float64_t *segment_mask = SG_MALLOC(float64_t, m_seq_len);
674  for (int32_t i=0; i<m_seq_len; i++)
675  {
676  segment_ids[i] = (int32_t)seg_path.matrix[2*i] ;
677  segment_mask[i] = seg_path.matrix[2*i+1] ;
678  }
679  best_path_set_segment_ids_mask(segment_ids, segment_mask, m_seq_len) ;
680  SG_FREE(segment_ids);
681  SG_FREE(segment_mask);
682  }
683  else
684  {
685  int32_t *izeros = SG_MALLOC(int32_t, m_seq_len);
686  float64_t *dzeros = SG_MALLOC(float64_t, m_seq_len);
687  for (int32_t i=0; i<m_seq_len; i++)
688  {
689  izeros[i]=0 ;
690  dzeros[i]=0.0 ;
691  }
692  best_path_set_segment_ids_mask(izeros, dzeros, m_seq_len) ;
693  SG_FREE(izeros);
694  SG_FREE(dzeros);
695  }
696 }
697 
699 {
700  m_pos.set_array(pos.vector, pos.vlen, true, true) ;
701  m_seq_len = pos.vlen;
702 }
703 
705 {
706  if (orf_info.num_cols!=2)
707  SG_ERROR("orf_info size incorrect %i!=2\n", orf_info.num_cols)
708 
709  m_orf_info.set_array(orf_info.matrix, orf_info.num_rows, orf_info.num_cols, true, true) ;
710 }
711 
713 {
714  if ((!seq_sparse1 && seq_sparse2) || (seq_sparse1 && !seq_sparse2))
715  SG_ERROR("Sparse features must either both be NULL or both NON-NULL\n")
716 
719 
720  m_seq_sparse1=seq_sparse1;
721  m_seq_sparse2=seq_sparse2;
724 }
725 
727 {
729 
730  m_plif_matrices=pm;
731 
733 }
734 
736 {
737  ASSERT(genestr.vector)
738  ASSERT(genestr.vlen>0)
739 
740  m_genestr.set_array(genestr.vector, genestr.vlen, true, true) ;
741 }
742 
743 void CDynProg::set_my_state_seq(int32_t* my_state_seq)
744 {
745  ASSERT(my_state_seq && m_seq_len>0)
747  for (int32_t i=0; i<m_seq_len; i++)
748  m_my_state_seq[i]=my_state_seq[i];
749 }
750 
751 void CDynProg::set_my_pos_seq(int32_t* my_pos_seq)
752 {
753  ASSERT(my_pos_seq && m_seq_len>0)
755  for (int32_t i=0; i<m_seq_len; i++)
756  m_my_pos_seq[i]=my_pos_seq[i];
757 }
758 
760 {
761  if (m_num_svms!=dictionary_weights.num_cols)
762  {
763  SG_ERROR("m_dict_weights array does not match num_svms=%i!=%i\n",
764  m_num_svms, dictionary_weights.num_cols) ;
765  }
766 
767  m_dict_weights.set_array(dictionary_weights.matrix, dictionary_weights.num_rows, m_num_svms, true, true) ;
768 
769  // initialize, so it does not bother when not used
776 }
777 
779 {
780  int32_t m=segment_loss.num_rows;
781  int32_t n=segment_loss.num_cols;
782  // here we need two matrices. Store it in one: 2N x N
783  if (2*m!=n)
784  SG_ERROR("segment_loss should be 2 x quadratic matrix: %i!=%i\n", 2*m, n)
785 
786  if (m!=m_max_a_id+1)
787  SG_ERROR("segment_loss size should match m_max_a_id: %i!=%i\n", m, m_max_a_id+1)
788 
789  m_segment_loss.set_array(segment_loss.matrix, m, n/2, 2, true, true) ;
790  /*for (int32_t i=0; i<n; i++)
791  for (int32_t j=0; j<n; j++)
792  SG_DEBUG("loss(%i,%i)=%f\n", i,j, m_segment_loss.element(0,i,j)) */
793 }
794 
796  int32_t* segment_ids, float64_t* segment_mask, int32_t m)
797 {
798 
800  SG_ERROR("size of segment_ids or segment_mask (%i) does not match the size of the feature matrix (%i)", m, m_observation_matrix.get_dim2())
801  int32_t max_id = 0;
802  for (int32_t i=1;i<m;i++)
803  max_id = CMath::max(max_id,segment_ids[i]);
804  //SG_PRINT("max_id: %i, m:%i\n",max_id, m)
805  m_segment_ids.set_array(segment_ids, m, true, true) ;
806  m_segment_mask.set_array(segment_mask, m, true, true) ;
807 
811 }
812 
814 {
816  sg_memcpy(scores.vector,m_scores.get_array(), sizeof(float64_t)*(m_scores.get_dim1()));
817 
818  return scores;
819 }
820 
822 {
824 
825  int32_t sz = sizeof(int32_t)*( m_states.get_dim1() * m_states.get_dim2() );
826  sg_memcpy(states.matrix ,m_states.get_array(),sz);
827 
828  return states;
829 }
830 
832 {
834 
835  int32_t sz = sizeof(int32_t)*(m_positions.get_dim1()*m_positions.get_dim2());
836  sg_memcpy(positions.matrix, m_positions.get_array(),sz);
837 
838  return positions;
839 }
840 
841 void CDynProg::get_path_scores(float64_t** scores, int32_t* seq_len)
842 {
843  ASSERT(scores && seq_len)
844 
845  *seq_len=m_my_scores.get_dim1();
846 
847  int32_t sz = sizeof(float64_t)*(*seq_len);
848 
849  *scores = SG_MALLOC(float64_t, *seq_len);
850  ASSERT(*scores)
851 
852  sg_memcpy(*scores,m_my_scores.get_array(),sz);
853 }
854 
855 void CDynProg::get_path_losses(float64_t** losses, int32_t* seq_len)
856 {
857  ASSERT(losses && seq_len)
858 
859  *seq_len=m_my_losses.get_dim1();
860 
861  int32_t sz = sizeof(float64_t)*(*seq_len);
862 
863  *losses = SG_MALLOC(float64_t, *seq_len);
864  ASSERT(*losses)
865 
866  sg_memcpy(*losses,m_my_losses.get_array(),sz);
867 }
868 
870 
872  int32_t orf_from, int32_t orf_to, int32_t start, int32_t &last_pos,
873  int32_t to)
874 {
875 #ifdef DYNPROG_TIMING_DETAIL
876  MyTime.start() ;
877 #endif
878 
879  if (start<0)
880  start=0 ;
881  if (to<0)
882  to=0 ;
883 
884  int32_t orf_target = orf_to-orf_from ;
885  if (orf_target<0) orf_target+=3 ;
886 
887  int32_t pos ;
888  if (last_pos==to)
889  pos = to-orf_to-3 ;
890  else
891  pos=last_pos ;
892 
893  if (pos<0)
894  {
895 #ifdef DYNPROG_TIMING_DETAIL
896  MyTime.stop() ;
897  orf_time += MyTime.time_diff_sec() ;
898 #endif
899  return true ;
900  }
901 
902  for (; pos>=start; pos-=3)
903  if (m_genestr_stop[pos])
904  {
905 #ifdef DYNPROG_TIMING_DETAIL
906  MyTime.stop() ;
907  orf_time += MyTime.time_diff_sec() ;
908 #endif
909  return false ;
910  }
911 
912 
913  last_pos = CMath::min(pos+3,to-orf_to-3) ;
914 
915 #ifdef DYNPROG_TIMING_DETAIL
916  MyTime.stop() ;
917  orf_time += MyTime.time_diff_sec() ;
918 #endif
919  return true ;
920 }
921 
922 void CDynProg::compute_nbest_paths(int32_t max_num_signals, bool use_orf,
923  int16_t nbest, bool with_loss, bool with_multiple_sequences)
924  {
925 
926  //FIXME we need checks here if all the fields are of right size
927  //SG_PRINT("m_seq_len: %i\n", m_seq_len)
928  //SG_PRINT("m_pos[0]: %i\n", m_pos[0])
929  //SG_PRINT("\n")
930 
931  //FIXME these variables can go away when compute_nbest_paths uses them
932  //instead of the local pointers below
933  const float64_t* seq_array = m_observation_matrix.get_array();
934  m_scores.resize_array(nbest) ;
937 
938  for (int32_t i=0; i<nbest; i++)
939  {
940  m_scores[i]=-1;
941  for (int32_t j=0; j<m_observation_matrix.get_dim2(); j++)
942  {
943  m_states.element(i,j)=-1;
944  m_positions.element(i,j)=-1;
945  }
946  }
947  float64_t* prob_nbest=m_scores.get_array();
948  int32_t* my_state_seq=m_states.get_array();
949  int32_t* my_pos_seq=m_positions.get_array();
950 
951  CPlifBase** Plif_matrix=m_plif_matrices->get_plif_matrix();
952  CPlifBase** Plif_state_signals=m_plif_matrices->get_state_signals();
953  //END FIXME
954 
955 
956 #ifdef DYNPROG_TIMING
957  segment_init_time = 0.0 ;
958  segment_pos_time = 0.0 ;
959  segment_extend_time = 0.0 ;
960  segment_clean_time = 0.0 ;
961  orf_time = 0.0 ;
962  svm_init_time = 0.0 ;
963  svm_pos_time = 0.0 ;
964  svm_clean_time = 0.0 ;
965  inner_loop_time = 0.0 ;
966  content_svm_values_time = 0.0 ;
967  content_plifs_time = 0.0 ;
968  inner_loop_max_time = 0.0 ;
969  long_transition_time = 0.0 ;
970 
971  MyTime2.start() ;
972 #endif
973 
974  if (!m_svm_arrays_clean)
975  {
976  SG_ERROR("SVM arrays not clean")
977  return ;
978  }
979 
980 #ifdef DYNPROG_DEBUG
985  //SG_PRINT("use_orf = %i\n", use_orf)
986 #endif
987 
988  int32_t max_look_back = 1000 ;
989  bool use_svm = false ;
990 
991  SG_DEBUG("m_N:%i, m_seq_len:%i, max_num_signals:%i\n",m_N, m_seq_len, max_num_signals)
992 
993  //for (int32_t i=0;i<m_N*m_seq_len*max_num_signals;i++)
994  // SG_PRINT("(%i)%0.2f ",i,seq_array[i])
995 
996  CDynamicObjectArray PEN((CSGObject**) Plif_matrix, m_N, m_N, false, false) ; // 2d, CPlifBase*
997 
998  CDynamicObjectArray PEN_state_signals((CSGObject**) Plif_state_signals, m_N, max_num_signals, false, false) ; // 2d, CPlifBase*
999 
1000  CDynamicArray<float64_t> seq(m_N, m_seq_len) ; // 2d
1001  seq.set_const(0) ;
1002 
1003 #ifdef DYNPROG_DEBUG
1004  SG_PRINT("m_num_raw_data: %i\n",m_num_raw_data)
1005  SG_PRINT("m_num_intron_plifs: %i\n", m_num_intron_plifs)
1006  SG_PRINT("m_num_svms: %i\n", m_num_svms)
1007  SG_PRINT("m_num_lin_feat_plifs_cum: ")
1008  for (int i=0; i<=m_num_raw_data; i++)
1010  SG_PRINT("\n")
1011 #endif
1012 
1014  { // initialize svm_svalue
1016  svm_value[s]=0 ;
1017  }
1018 
1019  { // convert seq_input to seq
1020  // this is independent of the svm values
1021 
1022  CDynamicArray<float64_t> *seq_input=NULL ; // 3d
1023  if (seq_array!=NULL)
1024  {
1025  //SG_PRINT("using dense seq_array\n")
1026 
1027  seq_input=new CDynamicArray<float64_t>(seq_array, m_N, m_seq_len, max_num_signals) ;
1028  //seq_input.display_array() ;
1029 
1030  ASSERT(m_seq_sparse1==NULL)
1031  ASSERT(m_seq_sparse2==NULL)
1032  } else
1033  {
1034  SG_PRINT("using sparse seq_array\n")
1035 
1036  ASSERT(m_seq_sparse1!=NULL)
1037  ASSERT(m_seq_sparse2!=NULL)
1038  ASSERT(max_num_signals==2)
1039  }
1040 
1041  for (int32_t i=0; i<m_N; i++)
1042  for (int32_t j=0; j<m_seq_len; j++)
1043  seq.element(i,j) = 0 ;
1044 
1045  for (int32_t i=0; i<m_N; i++)
1046  for (int32_t j=0; j<m_seq_len; j++)
1047  for (int32_t k=0; k<max_num_signals; k++)
1048  {
1049  if ((PEN_state_signals.element(i,k)==NULL) && (k==0))
1050  {
1051  // no plif
1052  if (seq_input!=NULL)
1053  seq.element(i,j) = seq_input->element(i,j,k) ;
1054  else
1055  {
1056  if (k==0)
1057  seq.element(i,j) = m_seq_sparse1->get_feature(i,j) ;
1058  if (k==1)
1059  seq.element(i,j) = m_seq_sparse2->get_feature(i,j) ;
1060  }
1061  break ;
1062  }
1063  if (PEN_state_signals.element(i,k)!=NULL)
1064  {
1065  if (seq_input!=NULL)
1066  {
1067  // just one plif
1068  if (CMath::is_finite(seq_input->element(i,j,k)))
1069  seq.element(i,j) += ((CPlifBase*) PEN_state_signals.element(i,k))->lookup_penalty(seq_input->element(i,j,k), svm_value) ;
1070  else
1071  // keep infinity values
1072  seq.element(i,j) = seq_input->element(i, j, k) ;
1073  }
1074  else
1075  {
1076  if (k==0)
1077  {
1078  // just one plif
1080  seq.element(i,j) += ((CPlifBase*) PEN_state_signals.element(i,k))->lookup_penalty(m_seq_sparse1->get_feature(i,j), svm_value) ;
1081  else
1082  // keep infinity values
1083  seq.element(i,j) = m_seq_sparse1->get_feature(i, j) ;
1084  }
1085  if (k==1)
1086  {
1087  // just one plif
1089  seq.element(i,j) += ((CPlifBase*) PEN_state_signals.element(i,k))->lookup_penalty(m_seq_sparse2->get_feature(i,j), svm_value) ;
1090  else
1091  // keep infinity values
1092  seq.element(i,j) = m_seq_sparse2->get_feature(i, j) ;
1093  }
1094  }
1095  }
1096  else
1097  break ;
1098  }
1099  delete seq_input;
1100  SG_FREE(svm_value);
1101  }
1102 
1103  // allow longer transitions than look_back
1104  bool long_transitions = m_long_transitions ;
1105  CDynamicArray<int32_t> long_transition_content_start_position(m_N,m_N) ; // 2d
1106 #ifdef DYNPROG_DEBUG
1107  CDynamicArray<int32_t> long_transition_content_end_position(m_N,m_N) ; // 2d
1108 #endif
1109  CDynamicArray<int32_t> long_transition_content_start(m_N,m_N) ; // 2d
1110 
1111  CDynamicArray<float64_t> long_transition_content_scores(m_N,m_N) ; // 2d
1112 #ifdef DYNPROG_DEBUG
1113 
1114  CDynamicArray<float64_t> long_transition_content_scores_pen(m_N,m_N) ; // 2d
1115 
1116  CDynamicArray<float64_t> long_transition_content_scores_prev(m_N,m_N) ; // 2d
1117 
1118  CDynamicArray<float64_t> long_transition_content_scores_elem(m_N,m_N) ; // 2d
1119 #endif
1120  CDynamicArray<float64_t> long_transition_content_scores_loss(m_N,m_N) ; // 2d
1121 
1122  if (nbest!=1)
1123  {
1124  SG_ERROR("Long transitions are not supported for nbest!=1")
1125  long_transitions = false ;
1126  }
1127  long_transition_content_scores.set_const(-CMath::INFTY);
1128 #ifdef DYNPROG_DEBUG
1129  long_transition_content_scores_pen.set_const(0) ;
1130  long_transition_content_scores_elem.set_const(0) ;
1131  long_transition_content_scores_prev.set_const(0) ;
1132 #endif
1133  if (with_loss)
1134  long_transition_content_scores_loss.set_const(0) ;
1135  long_transition_content_start.set_const(0) ;
1136  long_transition_content_start_position.set_const(0) ;
1137 #ifdef DYNPROG_DEBUG
1138  long_transition_content_end_position.set_const(0) ;
1139 #endif
1140 
1142  { // initialize svm_svalue
1144  svm_value[s]=0 ;
1145  }
1146 
1147  CDynamicArray<int32_t> look_back(m_N,m_N) ; // 2d
1148  //CDynamicArray<int32_t> look_back_orig(m_N,m_N) ;
1149 
1150 
1151  { // determine maximal length of look-back
1152  for (int32_t i=0; i<m_N; i++)
1153  for (int32_t j=0; j<m_N; j++)
1154  {
1155  look_back.set_element(INT_MAX, i, j) ;
1156  //look_back_orig.set_element(INT_MAX, i, j) ;
1157  }
1158 
1159  for (int32_t j=0; j<m_N; j++)
1160  {
1161  // only consider transitions that are actually allowed
1162  const T_STATES num_elem = trans_list_forward_cnt[j] ;
1163  const T_STATES *elem_list = trans_list_forward[j] ;
1164 
1165  for (int32_t i=0; i<num_elem; i++)
1166  {
1167  T_STATES ii = elem_list[i] ;
1168 
1169  CPlifBase *penij=(CPlifBase*) PEN.element(j, ii) ;
1170  if (penij==NULL)
1171  {
1172  if (long_transitions)
1173  {
1174  look_back.set_element(m_long_transition_threshold, j, ii) ;
1175  //look_back_orig.set_element(m_long_transition_max, j, ii) ;
1176  }
1177  continue ;
1178  }
1179 
1180  /* if the transition is an ORF or we do computation with loss, we have to disable long transitions */
1181  if ((m_orf_info.element(ii,0)!=-1) || (m_orf_info.element(j,1)!=-1) || (!long_transitions))
1182  {
1183  look_back.set_element(CMath::ceil(penij->get_max_value()), j, ii) ;
1184  //look_back_orig.set_element(CMath::ceil(penij->get_max_value()), j, ii) ;
1185  if (CMath::ceil(penij->get_max_value()) > max_look_back)
1186  {
1187  SG_DEBUG("%d %d -> value: %f\n", ii,j,penij->get_max_value())
1188  max_look_back = (int32_t) (CMath::ceil(penij->get_max_value()));
1189  }
1190  }
1191  else
1192  {
1193  look_back.set_element(CMath::min( (int32_t)CMath::ceil(penij->get_max_value()), m_long_transition_threshold ), j, ii) ;
1194  //look_back_orig.set_element( (int32_t)CMath::ceil(penij->get_max_value()), j, ii) ;
1195  }
1196 
1197  if (penij->uses_svm_values())
1198  use_svm=true ;
1199  }
1200  }
1201  /* make sure max_look_back is at least as long as a long transition */
1202  if (long_transitions)
1203  max_look_back = CMath::max(m_long_transition_threshold, max_look_back) ;
1204 
1205  /* make sure max_look_back is not longer than the whole string */
1206  max_look_back = CMath::min(m_genestr.get_dim1(), max_look_back) ;
1207 
1208  int32_t num_long_transitions = 0 ;
1209  for (int32_t i=0; i<m_N; i++)
1210  for (int32_t j=0; j<m_N; j++)
1211  {
1212  if (look_back.get_element(i,j)==m_long_transition_threshold)
1213  num_long_transitions++ ;
1214  if (look_back.get_element(i,j)==INT_MAX)
1215  {
1216  if (long_transitions)
1217  {
1218  look_back.set_element(m_long_transition_threshold, i, j) ;
1219  //look_back_orig.set_element(m_long_transition_max, i, j) ;
1220  }
1221  else
1222  {
1223  look_back.set_element(max_look_back, i, j) ;
1224  //look_back_orig.set_element(m_long_transition_max, i, j) ;
1225  }
1226  }
1227  }
1228  SG_DEBUG("Using %i long transitions\n", num_long_transitions)
1229  }
1230  //SG_PRINT("max_look_back: %i \n", max_look_back)
1231 
1232  //SG_PRINT("use_svm=%i, genestr_len: \n", use_svm, m_genestr.get_dim1())
1233  SG_DEBUG("use_svm=%i\n", use_svm)
1234 
1235  SG_DEBUG("maxlook: %d m_N: %d nbest: %d \n", max_look_back, m_N, nbest)
1236  const int32_t look_back_buflen = (max_look_back*m_N+1)*nbest ;
1237  SG_DEBUG("look_back_buflen=%i\n", look_back_buflen)
1238  /*const float64_t mem_use = (float64_t)(m_seq_len*m_N*nbest*(sizeof(T_STATES)+sizeof(int16_t)+sizeof(int32_t))+
1239  look_back_buflen*(2*sizeof(float64_t)+sizeof(int32_t))+
1240  m_seq_len*(sizeof(T_STATES)+sizeof(int32_t))+
1241  m_genestr.get_dim1()*sizeof(bool))/(1024*1024);*/
1242 
1243  //bool is_big = (mem_use>200) || (m_seq_len>5000) ;
1244 
1245  /*if (is_big)
1246  {
1247  SG_DEBUG("calling compute_nbest_paths: m_seq_len=%i, m_N=%i, lookback=%i nbest=%i\n",
1248  m_seq_len, m_N, max_look_back, nbest) ;
1249  SG_DEBUG("allocating %1.2fMB of memory\n",
1250  mem_use) ;
1251  }*/
1252  ASSERT(nbest<32000)
1253 
1254  CDynamicArray<float64_t> delta(m_seq_len, m_N, nbest) ; // 3d
1255  float64_t* delta_array = delta.get_array() ;
1256  //delta.set_const(0) ;
1257 
1258  CDynamicArray<T_STATES> psi(m_seq_len, m_N, nbest) ; // 3d
1259  //psi.set_const(0) ;
1260 
1261  CDynamicArray<int16_t> ktable(m_seq_len, m_N, nbest) ; // 3d
1262  //ktable.set_const(0) ;
1263 
1264  CDynamicArray<int32_t> ptable(m_seq_len, m_N, nbest) ; // 3d
1265  //ptable.set_const(0) ;
1266 
1267  CDynamicArray<float64_t> delta_end(nbest) ;
1268  //delta_end.set_const(0) ;
1269 
1270  CDynamicArray<T_STATES> path_ends(nbest) ;
1271  //path_ends.set_const(0) ;
1272 
1273  CDynamicArray<int16_t> ktable_end(nbest) ;
1274  //ktable_end.set_const(0) ;
1275 
1276  float64_t * fixedtempvv=SG_MALLOC(float64_t, look_back_buflen);
1277  memset(fixedtempvv, 0, look_back_buflen*sizeof(float64_t)) ;
1278  int32_t * fixedtempii=SG_MALLOC(int32_t, look_back_buflen);
1279  memset(fixedtempii, 0, look_back_buflen*sizeof(int32_t)) ;
1280 
1281  CDynamicArray<float64_t> oldtempvv(look_back_buflen) ;
1282 
1283  CDynamicArray<float64_t> oldtempvv2(look_back_buflen) ;
1284  //oldtempvv.set_const(0) ;
1285  //oldtempvv.display_size() ;
1286 
1287  CDynamicArray<int32_t> oldtempii(look_back_buflen) ;
1288 
1289  CDynamicArray<int32_t> oldtempii2(look_back_buflen) ;
1290  //oldtempii.set_const(0) ;
1291 
1292  CDynamicArray<T_STATES> state_seq(m_seq_len) ;
1293  //state_seq.set_const(0) ;
1294 
1296  //pos_seq.set_const(0) ;
1297 
1299 
1300 #ifdef DYNPROG_DEBUG
1301  state_seq.display_size() ;
1302  pos_seq.display_size() ;
1303 
1308  //word_used.display_size() ;
1309  //svm_values_unnormalized.display_size() ;
1310  //m_svm_pos_start.display_array() ;
1312 
1313  PEN.display_size() ;
1314  PEN_state_signals.display_size() ;
1315  seq.display_size() ;
1317 
1318  //m_genestr_stop.display_size() ;
1319  delta.display_size() ;
1320  psi.display_size() ;
1321  ktable.display_size() ;
1322  ptable.display_size() ;
1323  delta_end.display_size() ;
1324  path_ends.display_size() ;
1325  ktable_end.display_size() ;
1326 
1327 #ifdef USE_TMP_ARRAYCLASS
1328  fixedtempvv.display_size() ;
1329  fixedtempii.display_size() ;
1330 #endif
1331 
1332  //oldtempvv.display_size() ;
1333  //oldtempii.display_size() ;
1334 
1335  state_seq.display_size() ;
1336  pos_seq.display_size() ;
1337 
1338  //seq.set_const(0) ;
1339 
1340 #endif //DYNPROG_DEBUG
1341 
1343 
1344 
1345 
1346  {
1347  for (int32_t s=0; s<m_num_svms; s++)
1349  }
1350 
1351 
1352  //CDynamicArray<int32_t*> trans_matrix_svms(m_N,m_N); // 2d
1353  //CDynamicArray<int32_t> trans_matrix_num_svms(m_N,m_N); // 2d
1354 
1355  { // initialization
1356 
1357  for (T_STATES i=0; i<m_N; i++)
1358  {
1359  //delta.element(0, i, 0) = get_p(i) + seq.element(i,0) ; // get_p defined in HMM.h to be equiv to initial_state_distribution
1360  delta.element(delta_array, 0, i, 0, m_seq_len, m_N) = get_p(i) + seq.element(i,0) ; // get_p defined in HMM.h to be equiv to initial_state_distribution
1361  psi.element(0,i,0) = 0 ;
1362  if (nbest>1)
1363  ktable.element(0,i,0) = 0 ;
1364  ptable.element(0,i,0) = 0 ;
1365  for (int16_t k=1; k<nbest; k++)
1366  {
1367  int32_t dim1, dim2, dim3 ;
1368  delta.get_array_size(dim1, dim2, dim3) ;
1369  //SG_DEBUG("i=%i, k=%i -- %i, %i, %i\n", i, k, dim1, dim2, dim3)
1370  //delta.element(0, i, k) = -CMath::INFTY ;
1371  delta.element(delta_array, 0, i, k, m_seq_len, m_N) = -CMath::INFTY ;
1372  psi.element(0,i,0) = 0 ; // <--- what's this for?
1373  if (nbest>1)
1374  ktable.element(0,i,k) = 0 ;
1375  ptable.element(0,i,k) = 0 ;
1376  }
1377  /*
1378  for (T_STATES j=0; j<m_N; j++)
1379  {
1380  CPlifBase * penalty = PEN.element(i,j) ;
1381  int32_t num_current_svms=0;
1382  int32_t svm_ids[] = {-8, -7, -6, -5, -4, -3, -2, -1};
1383  if (penalty)
1384  {
1385  SG_PRINT("trans %i -> %i \n",i,j)
1386  penalty->get_used_svms(&num_current_svms, svm_ids);
1387  trans_matrix_svms.set_element(svm_ids,i,j);
1388  for (int32_t l=0;l<num_current_svms;l++)
1389  SG_PRINT("svm_ids[%i]: %i \n",l,svm_ids[l])
1390  trans_matrix_num_svms.set_element(num_current_svms,i,j);
1391  }
1392  }
1393  */
1394 
1395  }
1396  }
1397 
1398  SG_DEBUG("START_RECURSION \n\n")
1399 
1400  // recursion
1401  for (int32_t t=1; t<m_seq_len; t++)
1402  {
1403  //if (is_big && t%(1+(m_seq_len/1000))==1)
1404  // SG_PROGRESS(t, 0, m_seq_len)
1405  //SG_PRINT("%i\n", t)
1406 
1407  for (T_STATES j=0; j<m_N; j++)
1408  {
1409  if (seq.element(j,t)<=-1e20)
1410  { // if we cannot observe the symbol here, then we can omit the rest
1411  for (int16_t k=0; k<nbest; k++)
1412  {
1413  delta.element(delta_array, t, j, k, m_seq_len, m_N) = seq.element(j,t) ;
1414  psi.element(t,j,k) = 0 ;
1415  if (nbest>1)
1416  ktable.element(t,j,k) = 0 ;
1417  ptable.element(t,j,k) = 0 ;
1418  }
1419  }
1420  else
1421  {
1422  const T_STATES num_elem = trans_list_forward_cnt[j] ;
1423  const T_STATES *elem_list = trans_list_forward[j] ;
1424  const float64_t *elem_val = trans_list_forward_val[j] ;
1425  const int32_t *elem_id = trans_list_forward_id[j] ;
1426 
1427  int32_t fixed_list_len = 0 ;
1428  float64_t fixedtempvv_ = CMath::INFTY ;
1429  int32_t fixedtempii_ = 0 ;
1430  bool fixedtemplong = false ;
1431 
1432  for (int32_t i=0; i<num_elem; i++)
1433  {
1434  T_STATES ii = elem_list[i] ;
1435 
1436  const CPlifBase* penalty = (CPlifBase*) PEN.element(j,ii) ;
1437 
1438  /*int32_t look_back = max_look_back ;
1439  if (0)
1440  { // find lookback length
1441  CPlifBase *pen = (CPlifBase*) penalty ;
1442  if (pen!=NULL)
1443  look_back=(int32_t) (CMath::ceil(pen->get_max_value()));
1444  if (look_back>=1e6)
1445  SG_PRINT("%i,%i -> %d from %ld\n", j, ii, look_back, (long)pen)
1446  ASSERT(look_back<1e6)
1447  } */
1448 
1449  int32_t look_back_ = look_back.element(j, ii) ;
1450 
1451  int32_t orf_from = m_orf_info.element(ii,0) ;
1452  int32_t orf_to = m_orf_info.element(j,1) ;
1453  if((orf_from!=-1)!=(orf_to!=-1))
1454  SG_DEBUG("j=%i ii=%i orf_from=%i orf_to=%i p=%1.2f\n", j, ii, orf_from, orf_to, elem_val[i])
1455  ASSERT((orf_from!=-1)==(orf_to!=-1))
1456 
1457  int32_t orf_target = -1 ;
1458  if (orf_from!=-1)
1459  {
1460  orf_target=orf_to-orf_from ;
1461  if (orf_target<0)
1462  orf_target+=3 ;
1463  ASSERT(orf_target>=0 && orf_target<3)
1464  }
1465 
1466  int32_t orf_last_pos = m_pos[t] ;
1467 #ifdef DYNPROG_TIMING
1468  MyTime3.start() ;
1469 #endif
1470  int32_t num_ok_pos = 0 ;
1471 
1472  for (int32_t ts=t-1; ts>=0 && m_pos[t]-m_pos[ts]<=look_back_; ts--)
1473  {
1474  bool ok ;
1475  //int32_t plen=t-ts;
1476 
1477  /*for (int32_t s=0; s<m_num_svms; s++)
1478  if ((fabs(svs.svm_values[s*svs.seqlen+plen]-svs2.svm_values[s*svs.seqlen+plen])>1e-6) ||
1479  (fabs(svs.svm_values[s*svs.seqlen+plen]-svs3.svm_values[s*svs.seqlen+plen])>1e-6))
1480  {
1481  SG_DEBUG("s=%i, t=%i, ts=%i, %1.5e, %1.5e, %1.5e\n", s, t, ts, svs.svm_values[s*svs.seqlen+plen], svs2.svm_values[s*svs.seqlen+plen], svs3.svm_values[s*svs.seqlen+plen])
1482  }*/
1483 
1484  if (orf_target==-1)
1485  ok=true ;
1486  else if (m_pos[ts]!=-1 && (m_pos[t]-m_pos[ts])%3==orf_target)
1487  ok=(!use_orf) || extend_orf(orf_from, orf_to, m_pos[ts], orf_last_pos, m_pos[t]) ;
1488  else
1489  ok=false ;
1490 
1491  if (ok)
1492  {
1493 
1494  float64_t segment_loss = 0.0 ;
1495  if (with_loss)
1496  {
1497  segment_loss = m_seg_loss_obj->get_segment_loss(ts, t, elem_id[i]);
1498  //if (segment_loss!=segment_loss2)
1499  //SG_PRINT("segment_loss:%f segment_loss2:%f\n", segment_loss, segment_loss2)
1500  }
1502  // BEST_PATH_TRANS
1504 
1505  int32_t frame = orf_from;//m_orf_info.element(ii,0);
1506  lookup_content_svm_values(ts, t, m_pos[ts], m_pos[t], svm_value, frame);
1507 
1508  float64_t pen_val = 0.0 ;
1509  if (penalty)
1510  {
1511 #ifdef DYNPROG_TIMING_DETAIL
1512  MyTime.start() ;
1513 #endif
1514  pen_val = penalty->lookup_penalty(m_pos[t]-m_pos[ts], svm_value) ;
1515 
1516 #ifdef DYNPROG_TIMING_DETAIL
1517  MyTime.stop() ;
1518  content_plifs_time += MyTime.time_diff_sec() ;
1519 #endif
1520  }
1521 
1522 #ifdef DYNPROG_TIMING_DETAIL
1523  MyTime.start() ;
1524 #endif
1525  num_ok_pos++ ;
1526 
1527  if (nbest==1)
1528  {
1529  float64_t val = elem_val[i] + pen_val ;
1530  if (with_loss)
1531  val += segment_loss ;
1532 
1533  float64_t mval = -(val + delta.element(delta_array, ts, ii, 0, m_seq_len, m_N)) ;
1534 
1535  if (mval<fixedtempvv_)
1536  {
1537  fixedtempvv_ = mval ;
1538  fixedtempii_ = ii + ts*m_N;
1539  fixed_list_len = 1 ;
1540  fixedtemplong = false ;
1541  }
1542  }
1543  else
1544  {
1545  for (int16_t diff=0; diff<nbest; diff++)
1546  {
1547  float64_t val = elem_val[i] ;
1548  val += pen_val ;
1549  if (with_loss)
1550  val += segment_loss ;
1551 
1552  float64_t mval = -(val + delta.element(delta_array, ts, ii, diff, m_seq_len, m_N)) ;
1553 
1554  /* only place -val in fixedtempvv if it is one of the nbest lowest values in there */
1555  /* fixedtempvv[i], i=0:nbest-1, is sorted so that fixedtempvv[0] <= fixedtempvv[1] <= ...*/
1556  /* fixed_list_len has the number of elements in fixedtempvv */
1557 
1558  if ((fixed_list_len < nbest) || ((0==fixed_list_len) || (mval < fixedtempvv[fixed_list_len-1])))
1559  {
1560  if ( (fixed_list_len<nbest) && ((0==fixed_list_len) || (mval>fixedtempvv[fixed_list_len-1])) )
1561  {
1562  fixedtempvv[fixed_list_len] = mval ;
1563  fixedtempii[fixed_list_len] = ii + diff*m_N + ts*m_N*nbest;
1564  fixed_list_len++ ;
1565  }
1566  else // must have mval < fixedtempvv[fixed_list_len-1]
1567  {
1568  int32_t addhere = fixed_list_len;
1569  while ((addhere > 0) && (mval < fixedtempvv[addhere-1]))
1570  addhere--;
1571 
1572  // move everything from addhere+1 one forward
1573  for (int32_t jj=fixed_list_len-1; jj>addhere; jj--)
1574  {
1575  fixedtempvv[jj] = fixedtempvv[jj-1];
1576  fixedtempii[jj] = fixedtempii[jj-1];
1577  }
1578 
1579  fixedtempvv[addhere] = mval;
1580  fixedtempii[addhere] = ii + diff*m_N + ts*m_N*nbest;
1581 
1582  if (fixed_list_len < nbest)
1583  fixed_list_len++;
1584  }
1585  }
1586  }
1587  }
1588 #ifdef DYNPROG_TIMING_DETAIL
1589  MyTime.stop() ;
1590  inner_loop_max_time += MyTime.time_diff_sec() ;
1591 #endif
1592  }
1593  }
1594 #ifdef DYNPROG_TIMING
1595  MyTime3.stop() ;
1596  inner_loop_time += MyTime3.time_diff_sec() ;
1597 #endif
1598  }
1599  for (int32_t i=0; i<num_elem; i++)
1600  {
1601  T_STATES ii = elem_list[i] ;
1602 
1603  const CPlifBase* penalty = (CPlifBase*) PEN.element(j,ii) ;
1604 
1605  /*int32_t look_back = max_look_back ;
1606  if (0)
1607  { // find lookback length
1608  CPlifBase *pen = (CPlifBase*) penalty ;
1609  if (pen!=NULL)
1610  look_back=(int32_t) (CMath::ceil(pen->get_max_value()));
1611  if (look_back>=1e6)
1612  SG_PRINT("%i,%i -> %d from %ld\n", j, ii, look_back, (long)pen)
1613  ASSERT(look_back<1e6)
1614  } */
1615 
1616  int32_t look_back_ = look_back.element(j, ii) ;
1617  //int32_t look_back_orig_ = look_back_orig.element(j, ii) ;
1618 
1619  int32_t orf_from = m_orf_info.element(ii,0) ;
1620  int32_t orf_to = m_orf_info.element(j,1) ;
1621  if((orf_from!=-1)!=(orf_to!=-1))
1622  SG_DEBUG("j=%i ii=%i orf_from=%i orf_to=%i p=%1.2f\n", j, ii, orf_from, orf_to, elem_val[i])
1623  ASSERT((orf_from!=-1)==(orf_to!=-1))
1624 
1625  int32_t orf_target = -1 ;
1626  if (orf_from!=-1)
1627  {
1628  orf_target=orf_to-orf_from ;
1629  if (orf_target<0)
1630  orf_target+=3 ;
1631  ASSERT(orf_target>=0 && orf_target<3)
1632  }
1633 
1634  //int32_t loss_last_pos = t ;
1635  //float64_t last_loss = 0.0 ;
1636 
1637 #ifdef DYNPROG_TIMING
1638  MyTime3.start() ;
1639 #endif
1640 
1641  /* long transition stuff */
1642  /* only do this, if
1643  * this feature is enabled
1644  * this is not a transition with ORF restrictions
1645  * the loss is switched off
1646  * nbest=1
1647  */
1648 #ifdef DYNPROG_TIMING
1649  MyTime3.start() ;
1650 #endif
1651  // long transitions, only when not considering ORFs
1652  if ( long_transitions && orf_target==-1 && look_back_ == m_long_transition_threshold )
1653  {
1654 
1655  // update table for 5' part of the long segment
1656 
1657  int32_t start = long_transition_content_start.get_element(ii, j) ;
1658  int32_t end_5p_part = start ;
1659  for (int32_t start_5p_part=start; m_pos[t]-m_pos[start_5p_part] > m_long_transition_threshold ; start_5p_part++)
1660  {
1661  // find end_5p_part, which is greater than start_5p_part and at least m_long_transition_threshold away
1662  while (end_5p_part<=t && m_pos[end_5p_part+1]-m_pos[start_5p_part]<=m_long_transition_threshold)
1663  end_5p_part++ ;
1664 
1665  ASSERT(m_pos[end_5p_part+1]-m_pos[start_5p_part] > m_long_transition_threshold || end_5p_part==t)
1666  ASSERT(m_pos[end_5p_part]-m_pos[start_5p_part] <= m_long_transition_threshold)
1667 
1668  float64_t pen_val = 0.0;
1669  /* recompute penalty, if necessary */
1670  if (penalty)
1671  {
1672  int32_t frame = m_orf_info.element(ii,0);
1673  lookup_content_svm_values(start_5p_part, end_5p_part, m_pos[start_5p_part], m_pos[end_5p_part], svm_value, frame); // * t -> end_5p_part
1674  pen_val = penalty->lookup_penalty(m_pos[end_5p_part]-m_pos[start_5p_part], svm_value) ;
1675  }
1676 
1677  /*if (m_pos[start_5p_part]==1003)
1678  {
1679  SG_PRINT("Part1: %i - %i vs %i - %i\n", m_pos[t], m_pos[ts], m_pos[end_5p_part], m_pos[start_5p_part])
1680  SG_PRINT("Part1: ts=%i t=%i start_5p_part=%i m_seq_len=%i\n", m_pos[ts], m_pos[t], m_pos[start_5p_part], m_seq_len)
1681  }*/
1682 
1683  float64_t mval_trans = -( elem_val[i] + pen_val*0.5 + delta.element(delta_array, start_5p_part, ii, 0, m_seq_len, m_N) ) ;
1684  //float64_t mval_trans = -( elem_val[i] + delta.element(delta_array, ts, ii, 0, m_seq_len, m_N) ) ; // enable this for the incomplete extra check
1685 
1686  float64_t segment_loss_part1=0.0 ;
1687  if (with_loss)
1688  { // this is the loss from the start of the long segment (5' part + middle section)
1689 
1690  segment_loss_part1 = m_seg_loss_obj->get_segment_loss(start_5p_part /*long_transition_content_start_position.get_element(ii,j)*/, end_5p_part, elem_id[i]); // * unsure
1691 
1692  mval_trans -= segment_loss_part1 ;
1693  }
1694 
1695 
1696  if (0)//m_pos[end_5p_part] - m_pos[long_transition_content_start_position.get_element(ii, j)] > look_back_orig_/*m_long_transition_max*/)
1697  {
1698  // this restricts the maximal length of segments,
1699  // but the current implementation is not valid since the
1700  // long transition is discarded without loocking if there
1701  // is a second best long transition in between
1702  long_transition_content_scores.set_element(-CMath::INFTY, ii, j) ;
1703  long_transition_content_start_position.set_element(0, ii, j) ;
1704  if (with_loss)
1705  long_transition_content_scores_loss.set_element(0.0, ii, j) ;
1706 #ifdef DYNPROG_DEBUG
1707  long_transition_content_scores_pen.set_element(0.0, ii, j) ;
1708  long_transition_content_scores_elem.set_element(0.0, ii, j) ;
1709  long_transition_content_scores_prev.set_element(0.0, ii, j) ;
1710  long_transition_content_end_position.set_element(0, ii, j) ;
1711 #endif
1712  }
1713  if (with_loss)
1714  {
1715  float64_t old_loss = long_transition_content_scores_loss.get_element(ii, j) ;
1716  float64_t new_loss = m_seg_loss_obj->get_segment_loss(long_transition_content_start_position.get_element(ii,j), end_5p_part, elem_id[i]);
1717  float64_t score = long_transition_content_scores.get_element(ii, j) - old_loss + new_loss ;
1718  long_transition_content_scores.set_element(score, ii, j) ;
1719  long_transition_content_scores_loss.set_element(new_loss, ii, j) ;
1720 #ifdef DYNPROG_DEBUG
1721  long_transition_content_end_position.set_element(end_5p_part, ii, j) ;
1722 #endif
1723 
1724  }
1725  if (-long_transition_content_scores.get_element(ii, j) > mval_trans )
1726  {
1727  /* then the old long transition is either too far away or worse than the current one */
1728  long_transition_content_scores.set_element(-mval_trans, ii, j) ;
1729  long_transition_content_start_position.set_element(start_5p_part, ii, j) ;
1730  if (with_loss)
1731  long_transition_content_scores_loss.set_element(segment_loss_part1, ii, j) ;
1732 #ifdef DYNPROG_DEBUG
1733  long_transition_content_scores_pen.set_element(pen_val*0.5, ii, j) ;
1734  long_transition_content_scores_elem.set_element(elem_val[i], ii, j) ;
1735  long_transition_content_scores_prev.set_element(delta.element(delta_array, start_5p_part, ii, 0, m_seq_len, m_N), ii, j) ;
1736  /*ASSERT(fabs(long_transition_content_scores.get_element(ii, j)-(long_transition_content_scores_pen.get_element(ii, j) +
1737  long_transition_content_scores_elem.get_element(ii, j) +
1738  long_transition_content_scores_prev.get_element(ii, j)))<1e-6) ;*/
1739  long_transition_content_end_position.set_element(end_5p_part, ii, j) ;
1740 #endif
1741  }
1742  //
1743  // this sets the position where the search for better 5'parts is started the next time
1744  // whithout this the prediction takes ages
1745  //
1746  long_transition_content_start.set_element(start_5p_part, ii, j) ;
1747  }
1748 
1749  // consider the 3' part at the end of the long segment:
1750  // * with length = m_long_transition_threshold
1751  // * content prediction and loss only for this part
1752 
1753  // find ts > 0 with distance from m_pos[t] greater m_long_transition_threshold
1754  // precompute: only depends on t
1755  int ts = t;
1756  while (ts>0 && m_pos[t]-m_pos[ts-1] <= m_long_transition_threshold)
1757  ts-- ;
1758 
1759  if (ts>0)
1760  {
1761  ASSERT((m_pos[t]-m_pos[ts-1] > m_long_transition_threshold) && (m_pos[t]-m_pos[ts] <= m_long_transition_threshold))
1762 
1763 
1764  /* only consider this transition, if the right position was found */
1765  float pen_val_3p = 0.0 ;
1766  if (penalty)
1767  {
1768  int32_t frame = orf_from ; //m_orf_info.element(ii, 0);
1769  lookup_content_svm_values(ts, t, m_pos[ts], m_pos[t], svm_value, frame);
1770  pen_val_3p = penalty->lookup_penalty(m_pos[t]-m_pos[ts], svm_value) ;
1771  }
1772 
1773  float64_t mval = -(long_transition_content_scores.get_element(ii, j) + pen_val_3p*0.5) ;
1774 
1775  {
1776 #ifdef DYNPROG_DEBUG
1777  float64_t segment_loss_part2=0.0 ;
1778  float64_t segment_loss_part1=0.0 ;
1779 #endif
1780  float64_t segment_loss_total=0.0 ;
1781 
1782  if (with_loss)
1783  { // this is the loss for the 3' end fragment of the segment
1784  // (the 5' end and the middle section loss is already contained in mval)
1785 
1786 #ifdef DYNPROG_DEBUG
1787  // this is an alternative, which should be identical, if the loss is additive
1788  segment_loss_part2 = m_seg_loss_obj->get_segment_loss_extend(long_transition_content_end_position.get_element(ii,j), t, elem_id[i]);
1789  //mval -= segment_loss_part2 ;
1790  segment_loss_part1 = m_seg_loss_obj->get_segment_loss(long_transition_content_start_position.get_element(ii,j), long_transition_content_end_position.get_element(ii,j), elem_id[i]);
1791 #endif
1792  segment_loss_total = m_seg_loss_obj->get_segment_loss(long_transition_content_start_position.get_element(ii,j), t, elem_id[i]);
1793  mval -= (segment_loss_total-long_transition_content_scores_loss.get_element(ii, j)) ;
1794  }
1795 
1796 #ifdef DYNPROG_DEBUG
1797  if (m_pos[t]==10108 ||m_pos[t]==12802 ||m_pos[t]== 12561)
1798  {
1799  SG_PRINT("Part2: %i,%i,%i: val=%1.6f pen_val_3p*0.5=%1.6f (t=%i, ts=%i, ts-1=%i, ts+1=%i) scores=%1.6f (pen=%1.6f,prev=%1.6f,elem=%1.6f,loss=%1.1f), positions=%i,%i,%i, loss=%1.1f/%1.1f (%i,%i)\n",
1800  m_pos[t], j, ii, -mval, 0.5*pen_val_3p, m_pos[t], m_pos[ts], m_pos[ts-1], m_pos[ts+1],
1801  long_transition_content_scores.get_element(ii, j),
1802  long_transition_content_scores_pen.get_element(ii, j),
1803  long_transition_content_scores_prev.get_element(ii, j),
1804  long_transition_content_scores_elem.get_element(ii, j),
1805  long_transition_content_scores_loss.get_element(ii, j),
1806  m_pos[long_transition_content_start_position.get_element(ii,j)],
1807  m_pos[long_transition_content_end_position.get_element(ii,j)],
1808  m_pos[long_transition_content_start.get_element(ii,j)], segment_loss_part2, segment_loss_total, long_transition_content_start_position.get_element(ii,j), t) ;
1809  SG_PRINT("fixedtempvv_: %1.6f, from_state:%i from_pos:%i\n ",-fixedtempvv_, (fixedtempii_%m_N), m_pos[(fixedtempii_-(fixedtempii_%(m_N*nbest)))/(m_N*nbest)] )
1810  }
1811 
1812  if (fabs(segment_loss_part2+long_transition_content_scores_loss.get_element(ii, j) - segment_loss_total)>1e-3)
1813  {
1814  SG_ERROR("LOSS: total=%1.1f (%i-%i) part1=%1.1f/%1.1f (%i-%i) part2=%1.1f (%i-%i) sum=%1.1f diff=%1.1f\n",
1815  segment_loss_total, m_pos[long_transition_content_start_position.get_element(ii,j)], m_pos[t],
1816  long_transition_content_scores_loss.get_element(ii, j), segment_loss_part1, m_pos[long_transition_content_start_position.get_element(ii,j)], m_pos[long_transition_content_end_position.get_element(ii,j)],
1817  segment_loss_part2, m_pos[long_transition_content_end_position.get_element(ii,j)], m_pos[t],
1818  segment_loss_part2+long_transition_content_scores_loss.get_element(ii, j),
1819  segment_loss_part2+long_transition_content_scores_loss.get_element(ii, j) - segment_loss_total) ;
1820  }
1821 #endif
1822  }
1823 
1824  // prefer simpler version to guarantee optimality
1825  //
1826  // original:
1827  /* if ((mval < fixedtempvv_) &&
1828  (m_pos[t] - m_pos[long_transition_content_start_position.get_element(ii, j)])<=look_back_orig_) */
1829  if (mval < fixedtempvv_)
1830  {
1831  /* then the long transition is better than the short one => replace it */
1832  int32_t fromtjk = fixedtempii_ ;
1833  /*SG_PRINT("%i,%i: Long transition (%1.5f=-(%1.5f+%1.5f+%1.5f+%1.5f), %i) to m_pos %i better than short transition (%1.5f,%i) to m_pos %i \n",
1834  m_pos[t], j,
1835  mval, pen_val_3p*0.5, long_transition_content_scores_pen.get_element(ii, j), long_transition_content_scores_elem.get_element(ii, j), long_transition_content_scores_prev.get_element(ii, j), ii,
1836  m_pos[long_transition_content_position.get_element(ii, j)],
1837  fixedtempvv_, (fromtjk%m_N), m_pos[(fromtjk-(fromtjk%(m_N*nbest)))/(m_N*nbest)]) ;*/
1838  ASSERT((fromtjk-(fromtjk%(m_N*nbest)))/(m_N*nbest)==0 || m_pos[(fromtjk-(fromtjk%(m_N*nbest)))/(m_N*nbest)]>=m_pos[long_transition_content_start_position.get_element(ii, j)] || fixedtemplong)
1839 
1840  fixedtempvv_ = mval ;
1841  fixedtempii_ = ii + m_N*long_transition_content_start_position.get_element(ii, j) ;
1842  fixed_list_len = 1 ;
1843  fixedtemplong = true ;
1844  }
1845  }
1846  }
1847  }
1848 #ifdef DYNPROG_TIMING
1849  MyTime3.stop() ;
1850  long_transition_time += MyTime3.time_diff_sec() ;
1851 #endif
1852 
1853 
1854  int32_t numEnt = fixed_list_len;
1855 
1856  float64_t minusscore;
1857  int64_t fromtjk;
1858 
1859  for (int16_t k=0; k<nbest; k++)
1860  {
1861  if (k<numEnt)
1862  {
1863  if (nbest==1)
1864  {
1865  minusscore = fixedtempvv_ ;
1866  fromtjk = fixedtempii_ ;
1867  }
1868  else
1869  {
1870  minusscore = fixedtempvv[k];
1871  fromtjk = fixedtempii[k];
1872  }
1873 
1874  delta.element(delta_array, t, j, k, m_seq_len, m_N) = -minusscore + seq.element(j,t);
1875  psi.element(t,j,k) = (fromtjk%m_N) ;
1876  if (nbest>1)
1877  ktable.element(t,j,k) = (fromtjk%(m_N*nbest)-psi.element(t,j,k))/m_N ;
1878  ptable.element(t,j,k) = (fromtjk-(fromtjk%(m_N*nbest)))/(m_N*nbest) ;
1879  }
1880  else
1881  {
1882  delta.element(delta_array, t, j, k, m_seq_len, m_N) = -CMath::INFTY ;
1883  psi.element(t,j,k) = 0 ;
1884  if (nbest>1)
1885  ktable.element(t,j,k) = 0 ;
1886  ptable.element(t,j,k) = 0 ;
1887  }
1888  }
1889  }
1890  }
1891  }
1892  { //termination
1893  int32_t list_len = 0 ;
1894  for (int16_t diff=0; diff<nbest; diff++)
1895  {
1896  for (T_STATES i=0; i<m_N; i++)
1897  {
1898  oldtempvv[list_len] = -(delta.element(delta_array, (m_seq_len-1), i, diff, m_seq_len, m_N)+get_q(i)) ;
1899  oldtempii[list_len] = i + diff*m_N ;
1900  list_len++ ;
1901  }
1902  }
1903 
1904  CMath::nmin(oldtempvv.get_array(), oldtempii.get_array(), list_len, nbest) ;
1905 
1906  for (int16_t k=0; k<nbest; k++)
1907  {
1908  delta_end.element(k) = -oldtempvv[k] ;
1909  path_ends.element(k) = (oldtempii[k]%m_N) ;
1910  if (nbest>1)
1911  ktable_end.element(k) = (oldtempii[k]-path_ends.element(k))/m_N ;
1912  }
1913 
1914 
1915  }
1916 
1917  { //state sequence backtracking
1918  for (int16_t k=0; k<nbest; k++)
1919  {
1920  prob_nbest[k]= delta_end.element(k) ;
1921 
1922  int32_t i = 0 ;
1923  state_seq[i] = path_ends.element(k) ;
1924  int16_t q = 0 ;
1925  if (nbest>1)
1926  q=ktable_end.element(k) ;
1927  pos_seq[i] = m_seq_len-1 ;
1928 
1929  while (pos_seq[i]>0)
1930  {
1931  ASSERT(i+1<m_seq_len)
1932  //SG_DEBUG("s=%i p=%i q=%i\n", state_seq[i], pos_seq[i], q)
1933  state_seq[i+1] = psi.element(pos_seq[i], state_seq[i], q);
1934  pos_seq[i+1] = ptable.element(pos_seq[i], state_seq[i], q) ;
1935  if (nbest>1)
1936  q = ktable.element(pos_seq[i], state_seq[i], q) ;
1937  i++ ;
1938  }
1939  //SG_DEBUG("s=%i p=%i q=%i\n", state_seq[i], pos_seq[i], q)
1940  int32_t num_states = i+1 ;
1941  for (i=0; i<num_states;i++)
1942  {
1943  my_state_seq[i+k*m_seq_len] = state_seq[num_states-i-1] ;
1944  my_pos_seq[i+k*m_seq_len] = pos_seq[num_states-i-1] ;
1945  }
1946  if (num_states<m_seq_len)
1947  {
1948  my_state_seq[num_states+k*m_seq_len]=-1 ;
1949  my_pos_seq[num_states+k*m_seq_len]=-1 ;
1950  }
1951  }
1952  }
1953 
1954  //if (is_big)
1955  // SG_PRINT("DONE. \n")
1956 
1957 
1958 #ifdef DYNPROG_TIMING
1959  MyTime2.stop() ;
1960 
1961  //if (is_big)
1962  SG_PRINT("Timing: orf=%1.2f s \n Segment_init=%1.2f s Segment_pos=%1.2f s Segment_extend=%1.2f s Segment_clean=%1.2f s\nsvm_init=%1.2f s svm_pos=%1.2f svm_clean=%1.2f\n content_svm_values_time=%1.2f content_plifs_time=%1.2f\ninner_loop_max_time=%1.2f inner_loop=%1.2f long_transition_time=%1.2f\n total=%1.2f\n", orf_time, segment_init_time, segment_pos_time, segment_extend_time, segment_clean_time, svm_init_time, svm_pos_time, svm_clean_time, content_svm_values_time, content_plifs_time, inner_loop_max_time, inner_loop_time, long_transition_time, MyTime2.time_diff_sec())
1963 #endif
1964 
1965  SG_FREE(fixedtempvv);
1966  SG_FREE(fixedtempii);
1967  }
1968 
1969 
1971  int32_t *my_state_seq, int32_t *my_pos_seq,
1972  int32_t my_seq_len, const float64_t *seq_array, int32_t max_num_signals)
1973 {
1977  //m_my_scores.resize_array(m_my_state_seq.get_array_size()) ;
1978  //m_my_losses.resize_array(m_my_state_seq.get_array_size()) ;
1979  m_my_scores.resize_array(my_seq_len);
1980  m_my_losses.resize_array(my_seq_len);
1981  float64_t* my_scores=m_my_scores.get_array();
1982  float64_t* my_losses=m_my_losses.get_array();
1983  CPlifBase** Plif_matrix=m_plif_matrices->get_plif_matrix();
1984  CPlifBase** Plif_state_signals=m_plif_matrices->get_state_signals();
1985 
1986  if (!m_svm_arrays_clean)
1987  {
1988  SG_ERROR("SVM arrays not clean")
1989  return ;
1990  } ;
1991  //SG_PRINT("genestr_len=%i, genestr_num=%i\n", genestr_len, genestr_num)
1992  //m_mod_words.display() ;
1993  //m_sign_words.display() ;
1994  //m_string_words.display() ;
1995 
1996  bool use_svm = false ;
1997 
1998  CDynamicObjectArray PEN((CSGObject**) Plif_matrix, m_N, m_N, false, false) ; // 2d, CPlifBase*
1999 
2000  CDynamicObjectArray PEN_state_signals((CSGObject**) Plif_state_signals, m_N, max_num_signals, false, false) ; // 2d, CPlifBase*
2001 
2002  CDynamicArray<float64_t> seq_input(seq_array, m_N, m_seq_len, max_num_signals) ;
2003 
2004  { // determine whether to use svm outputs and clear derivatives
2005  for (int32_t i=0; i<m_N; i++)
2006  for (int32_t j=0; j<m_N; j++)
2007  {
2008  CPlifBase* penij=(CPlifBase*) PEN.element(i,j) ;
2009  if (penij==NULL)
2010  continue ;
2011 
2012  if (penij->uses_svm_values())
2013  use_svm=true ;
2014  penij->penalty_clear_derivative() ;
2015  }
2016  for (int32_t i=0; i<m_N; i++)
2017  for (int32_t j=0; j<max_num_signals; j++)
2018  {
2019  CPlifBase* penij=(CPlifBase*) PEN_state_signals.element(i,j) ;
2020  if (penij==NULL)
2021  continue ;
2022  if (penij->uses_svm_values())
2023  use_svm=true ;
2024  penij->penalty_clear_derivative() ;
2025  }
2026  }
2027 
2028  { // set derivatives of p, q and a to zero
2029 
2030  for (int32_t i=0; i<m_N; i++)
2031  {
2034  for (int32_t j=0; j<m_N; j++)
2036  }
2037  }
2038 
2039  { // clear score vector
2040  for (int32_t i=0; i<my_seq_len; i++)
2041  {
2042  my_scores[i]=0.0 ;
2043  my_losses[i]=0.0 ;
2044  }
2045  }
2046 
2047  //int32_t total_len = 0 ;
2048 
2049  //m_transition_matrix_a.display_array() ;
2050  //m_transition_matrix_a_id.display_array() ;
2051 
2052  // compute derivatives for given path
2057  {
2058  svm_value[s]=0 ;
2059  svm_value_part1[s]=0 ;
2060  svm_value_part2[s]=0 ;
2061  }
2062 
2063  //#ifdef DYNPROG_DEBUG
2064  float64_t total_score = 0.0 ;
2065  float64_t total_loss = 0.0 ;
2066  //#endif
2067 
2068  ASSERT(my_state_seq[0]>=0)
2069  m_initial_state_distribution_p_deriv.element(my_state_seq[0])++ ;
2070  my_scores[0] += m_initial_state_distribution_p.element(my_state_seq[0]) ;
2071 
2072  ASSERT(my_state_seq[my_seq_len-1]>=0)
2073  m_end_state_distribution_q_deriv.element(my_state_seq[my_seq_len-1])++ ;
2074  my_scores[my_seq_len-1] += m_end_state_distribution_q.element(my_state_seq[my_seq_len-1]);
2075 
2076  //#ifdef DYNPROG_DEBUG
2077  total_score += my_scores[0] + my_scores[my_seq_len-1] ;
2078  //#endif
2079 
2080  SG_DEBUG("m_seq_len=%i\n", my_seq_len)
2081  for (int32_t i=0; i<my_seq_len-1; i++)
2082  {
2083  if (my_state_seq[i+1]==-1)
2084  break ;
2085  int32_t from_state = my_state_seq[i] ;
2086  int32_t to_state = my_state_seq[i+1] ;
2087  int32_t from_pos = my_pos_seq[i] ;
2088  int32_t to_pos = my_pos_seq[i+1] ;
2089 
2090  int32_t elem_id = m_transition_matrix_a_id.element(from_state, to_state) ;
2091  my_losses[i] = m_seg_loss_obj->get_segment_loss(from_pos, to_pos, elem_id);
2092 
2093 #ifdef DYNPROG_DEBUG
2094 
2095 
2096  if (i>0)// test if segment loss is additive
2097  {
2098  float32_t loss1 = m_seg_loss_obj->get_segment_loss(my_pos_seq[i-1], my_pos_seq[i], elem_id);
2099  float32_t loss2 = m_seg_loss_obj->get_segment_loss(my_pos_seq[i], my_pos_seq[i+1], elem_id);
2100  float32_t loss3 = m_seg_loss_obj->get_segment_loss(my_pos_seq[i-1], my_pos_seq[i+1], elem_id);
2101  SG_PRINT("loss1:%f loss2:%f loss3:%f, diff:%f\n", loss1, loss2, loss3, loss1+loss2-loss3)
2102  if (CMath::abs(loss1+loss2-loss3)>0)
2103  {
2104  SG_PRINT("%i. segment loss %f (id=%i): from=%i(%i), to=%i(%i)\n", i, my_losses[i], elem_id, from_pos, from_state, to_pos, to_state)
2105  }
2106  }
2107  io->set_loglevel(M_DEBUG) ;
2108  SG_DEBUG("%i. segment loss %f (id=%i): from=%i(%i), to=%i(%i)\n", i, my_losses[i], elem_id, from_pos, from_state, to_pos, to_state)
2109 #endif
2110  // increase usage of this transition
2111  m_transition_matrix_a_deriv.element(from_state, to_state)++ ;
2112  my_scores[i] += m_transition_matrix_a.element(from_state, to_state) ;
2113  //SG_PRINT("m_transition_matrix_a.element(%i, %i),%f \n",from_state, to_state, m_transition_matrix_a.element(from_state, to_state))
2114 #ifdef DYNPROG_DEBUG
2115  SG_DEBUG("%i. scores[i]=%f\n", i, my_scores[i])
2116 #endif
2117 
2118  /*int32_t last_svm_pos[m_num_degrees] ;
2119  for (int32_t qq=0; qq<m_num_degrees; qq++)
2120  last_svm_pos[qq]=-1 ;*/
2121 
2122  bool is_long_transition = false ;
2123  if (m_long_transitions)
2124  {
2125  if (m_pos[to_pos]-m_pos[from_pos]>m_long_transition_threshold)
2126  is_long_transition = true ;
2127  if (m_orf_info.element(from_state,0)!=-1)
2128  is_long_transition = false ;
2129  }
2130 
2131  int32_t from_pos_thresh = from_pos ;
2132  int32_t to_pos_thresh = to_pos ;
2133 
2134  if (use_svm)
2135  {
2136  if (is_long_transition)
2137  {
2138 
2139  while (from_pos_thresh<to_pos && m_pos[from_pos_thresh+1] - m_pos[from_pos] <= m_long_transition_threshold) // *
2140  from_pos_thresh++ ;
2141  ASSERT(from_pos_thresh<to_pos)
2142  ASSERT(m_pos[from_pos_thresh] - m_pos[from_pos] <= m_long_transition_threshold) // *
2143  ASSERT(m_pos[from_pos_thresh+1] - m_pos[from_pos] > m_long_transition_threshold)// *
2144 
2145  int32_t frame = m_orf_info.element(from_state,0);
2146  lookup_content_svm_values(from_pos, from_pos_thresh, m_pos[from_pos], m_pos[from_pos_thresh], svm_value_part1, frame);
2147 
2148 #ifdef DYNPROG_DEBUG
2149  SG_PRINT("part1: pos1: %i pos2: %i pos3: %i \nsvm_value_part1: ", m_pos[from_pos], m_pos[from_pos_thresh], m_pos[from_pos_thresh+1])
2151  SG_PRINT("%1.4f ", svm_value_part1[s])
2152  SG_PRINT("\n")
2153 #endif
2154 
2155  while (to_pos_thresh>0 && m_pos[to_pos] - m_pos[to_pos_thresh-1] <= m_long_transition_threshold) // *
2156  to_pos_thresh-- ;
2157  ASSERT(to_pos_thresh>0)
2158  ASSERT(m_pos[to_pos] - m_pos[to_pos_thresh] <= m_long_transition_threshold) // *
2159  ASSERT(m_pos[to_pos] - m_pos[to_pos_thresh-1] > m_long_transition_threshold) // *
2160 
2161  lookup_content_svm_values(to_pos_thresh, to_pos, m_pos[to_pos_thresh], m_pos[to_pos], svm_value_part2, frame);
2162 
2163 #ifdef DYNPROG_DEBUG
2164  SG_PRINT("part2: pos1: %i pos2: %i pos3: %i \nsvm_value_part2: ", m_pos[to_pos], m_pos[to_pos_thresh], m_pos[to_pos_thresh+1])
2166  SG_PRINT("%1.4f ", svm_value_part2[s])
2167  SG_PRINT("\n")
2168 #endif
2169  }
2170  else
2171  {
2172  /* normal case */
2173 
2174  //SG_PRINT("from_pos: %i; to_pos: %i; m_pos[to_pos]-m_pos[from_pos]: %i \n",from_pos, to_pos, m_pos[to_pos]-m_pos[from_pos])
2175  int32_t frame = m_orf_info.element(from_state,0);
2176  if (false)//(frame>=0)
2177  {
2178  int32_t num_current_svms=0;
2179  int32_t svm_ids[] = {-8, -7, -6, -5, -4, -3, -2, -1};
2180  SG_PRINT("penalties(%i, %i), frame:%i ", from_state, to_state, frame)
2181  ((CPlifBase*) PEN.element(to_state, from_state))->get_used_svms(&num_current_svms, svm_ids);
2182  SG_PRINT("\n")
2183  }
2184 
2185  lookup_content_svm_values(from_pos, to_pos, m_pos[from_pos],m_pos[to_pos], svm_value, frame);
2186 #ifdef DYNPROG_DEBUG
2187  SG_PRINT("part2: pos1: %i pos2: %i \nsvm_values: ", m_pos[from_pos], m_pos[to_pos])
2189  SG_PRINT("%1.4f ", svm_value[s])
2190  SG_PRINT("\n")
2191 #endif
2192  }
2193  }
2194 
2195  if (PEN.element(to_state, from_state)!=NULL)
2196  {
2197  float64_t nscore = 0 ;
2198  if (is_long_transition)
2199  {
2200  float64_t pen_value_part1 = ((CPlifBase*) PEN.element(to_state, from_state))->lookup_penalty(m_pos[from_pos_thresh]-m_pos[from_pos], svm_value_part1) ;
2201  float64_t pen_value_part2 = ((CPlifBase*) PEN.element(to_state, from_state))->lookup_penalty(m_pos[to_pos]-m_pos[to_pos_thresh], svm_value_part2) ;
2202  nscore= 0.5*pen_value_part1 + 0.5*pen_value_part2 ;
2203  }
2204  else
2205  nscore = ((CPlifBase*) PEN.element(to_state, from_state))->lookup_penalty(m_pos[to_pos]-m_pos[from_pos], svm_value) ;
2206 
2207  if (false)//(nscore<-1e9)
2208  SG_PRINT("is_long_transition=%i (from_pos=%i (%i), to_pos=%i (%i)=> %1.5f\n",
2209  is_long_transition, m_pos[from_pos], from_state, m_pos[to_pos], to_state, nscore) ;
2210 
2211  my_scores[i] += nscore ;
2212 
2213  for (int32_t s=m_num_svms;s<m_num_lin_feat_plifs_cum[m_num_raw_data]; s++)/*set tiling plif values to neutral values (that do not influence derivative calculation)*/
2214  {
2215  svm_value[s]=-CMath::INFTY;
2216  svm_value_part1[s]=-CMath::INFTY;
2217  svm_value_part2[s]=-CMath::INFTY;
2218  }
2219 
2220 #ifdef DYNPROG_DEBUG
2221  //SG_DEBUG("%i. transition penalty: from_state=%i to_state=%i from_pos=%i [%i] to_pos=%i [%i] value=%i\n", i, from_state, to_state, from_pos, m_pos[from_pos], to_pos, m_pos[to_pos], m_pos[to_pos]-m_pos[from_pos])
2222 #endif
2223  if (is_long_transition)
2224  {
2225 #ifdef DYNPROG_DEBUG
2226  float64_t sum_score = 0.0 ;
2227 
2228  for (int kk=0; kk<i; kk++)
2229  sum_score += my_scores[i] ;
2230 
2231  SG_PRINT("is_long_transition=%i (from_pos=%i (%i), to_pos=%i (%i)=> %1.5f, %1.5f --- 1: %1.6f (%i-%i) 2: %1.6f (%i-%i) \n",
2232  is_long_transition, m_pos[from_pos], from_state, m_pos[to_pos], to_state,
2233  nscore, sum_score,
2234  PEN.element(to_state, from_state)->lookup_penalty(m_pos[from_pos_thresh]-m_pos[from_pos], svm_value_part1)*0.5, m_pos[from_pos], m_pos[from_pos_thresh],
2235  PEN.element(to_state, from_state)->lookup_penalty(m_pos[to_pos]-m_pos[to_pos_thresh], svm_value_part2)*0.5, m_pos[to_pos_thresh], m_pos[to_pos]) ;
2236 #endif
2237  }
2238 
2239  if (is_long_transition)
2240  {
2241  ((CPlifBase*) PEN.element(to_state, from_state))->penalty_add_derivative(m_pos[from_pos_thresh]-m_pos[from_pos], svm_value_part1, 0.5) ;
2242  ((CPlifBase*) PEN.element(to_state, from_state))->penalty_add_derivative(m_pos[to_pos]-m_pos[to_pos_thresh], svm_value_part2, 0.5) ;
2243  }
2244  else
2245  ((CPlifBase*) PEN.element(to_state, from_state))->penalty_add_derivative(m_pos[to_pos]-m_pos[from_pos], svm_value, 1) ;
2246 
2247  //SG_PRINT("m_num_raw_data = %i \n", m_num_raw_data)
2248 
2249  // for tiling array and rna-seq data every single measurement must be added to the derivative
2250  // in contrast to the content svm predictions where we have a single value per transition;
2251  // content svm predictions have already been added to the derivative, thus we start with d=1
2252  // instead of d=0
2253  if (is_long_transition)
2254  {
2255  for (int32_t d=1; d<=m_num_raw_data; d++)
2256  {
2257  for (int32_t s=0;s<m_num_lin_feat_plifs_cum[m_num_raw_data]+m_num_intron_plifs;s++)
2258  svm_value[s]=-CMath::INFTY;
2259  float64_t* intensities = SG_MALLOC(float64_t, m_num_probes_cum[d]);
2260  int32_t num_intensities = raw_intensities_interval_query(m_pos[from_pos], m_pos[from_pos_thresh],intensities, d);
2261  for (int32_t k=0;k<num_intensities;k++)
2262  {
2263  for (int32_t j=m_num_lin_feat_plifs_cum[d-1];j<m_num_lin_feat_plifs_cum[d];j++)
2264  svm_value[j]=intensities[k];
2265 
2266  ((CPlifBase*) PEN.element(to_state, from_state))->penalty_add_derivative(-CMath::INFTY, svm_value, 0.5) ;
2267 
2268  }
2269  num_intensities = raw_intensities_interval_query(m_pos[to_pos_thresh], m_pos[to_pos],intensities, d);
2270  for (int32_t k=0;k<num_intensities;k++)
2271  {
2272  for (int32_t j=m_num_lin_feat_plifs_cum[d-1];j<m_num_lin_feat_plifs_cum[d];j++)
2273  svm_value[j]=intensities[k];
2274 
2275  ((CPlifBase*) PEN.element(to_state, from_state))->penalty_add_derivative(-CMath::INFTY, svm_value, 0.5) ;
2276 
2277  }
2278  SG_FREE(intensities);
2279 
2280  }
2281  }
2282  else
2283  {
2284  for (int32_t d=1; d<=m_num_raw_data; d++)
2285  {
2286  for (int32_t s=0;s<m_num_lin_feat_plifs_cum[m_num_raw_data]+m_num_intron_plifs;s++)
2287  svm_value[s]=-CMath::INFTY;
2288  float64_t* intensities = SG_MALLOC(float64_t, m_num_probes_cum[d]);
2289  int32_t num_intensities = raw_intensities_interval_query(m_pos[from_pos], m_pos[to_pos],intensities, d);
2290  //SG_PRINT("m_pos[from_pos]:%i, m_pos[to_pos]:%i, num_intensities:%i\n",m_pos[from_pos],m_pos[to_pos], num_intensities)
2291  for (int32_t k=0;k<num_intensities;k++)
2292  {
2293  for (int32_t j=m_num_lin_feat_plifs_cum[d-1];j<m_num_lin_feat_plifs_cum[d];j++)
2294  svm_value[j]=intensities[k];
2295 
2296  ((CPlifBase*) PEN.element(to_state, from_state))->penalty_add_derivative(-CMath::INFTY, svm_value, 1) ;
2297 
2298  }
2299  SG_FREE(intensities);
2300  }
2301  }
2302 
2303  }
2304 #ifdef DYNPROG_DEBUG
2305  SG_DEBUG("%i. scores[i]=%f\n", i, my_scores[i])
2306 #endif
2307 
2308  //SG_DEBUG("emmission penalty skipped: to_state=%i to_pos=%i value=%1.2f score=%1.2f\n", to_state, to_pos, seq_input.element(to_state, to_pos), 0.0)
2309  for (int32_t k=0; k<max_num_signals; k++)
2310  {
2311  if ((PEN_state_signals.element(to_state,k)==NULL)&&(k==0))
2312  {
2313 #ifdef DYNPROG_DEBUG
2314  SG_DEBUG("%i. emmission penalty: to_state=%i to_pos=%i score=%1.2f (no signal plif)\n", i, to_state, to_pos, seq_input.element(to_state, to_pos, k))
2315 #endif
2316  my_scores[i] += seq_input.element(to_state, to_pos, k) ;
2317  //if (seq_input.element(to_state, to_pos, k) !=0)
2318  // SG_PRINT("features(%i,%i): %f\n",to_state,to_pos,seq_input.element(to_state, to_pos, k))
2319  break ;
2320  }
2321  if (PEN_state_signals.element(to_state, k)!=NULL)
2322  {
2323  float64_t nscore = ((CPlifBase*) PEN_state_signals.element(to_state,k))->lookup_penalty(seq_input.element(to_state, to_pos, k), svm_value) ; // this should be ok for long_transitions (svm_value does not matter)
2324  my_scores[i] += nscore ;
2325 #ifdef DYNPROG_DEBUG
2326  if (false)//(nscore<-1e9)
2327  {
2328  SG_PRINT("is_long_transition=%i (from_pos=%i (%i), from_state=%i, to_pos=%i (%i) to_state=%i=> %1.5f, dim3:%i, seq_input.element(to_state, to_pos, k): %1.4f\n",
2329  is_long_transition, m_pos[from_pos], from_pos, from_state, m_pos[to_pos], to_pos, to_state, nscore, k, seq_input.element(to_state, to_pos, k)) ;
2330  for (int x=0; x<23; x++)
2331  {
2332  for (int i=-10; i<10; i++)
2333  SG_PRINT("%1.4f\t", seq_input.element(x, to_pos+i, k))
2334  SG_PRINT("\n")
2335  }
2336 
2337  }
2338 #endif
2339  //break ;
2340  //int32_t num_current_svms=0;
2341  //int32_t svm_ids[] = {-8, -7, -6, -5, -4, -3, -2, -1};
2342  //SG_PRINT("PEN_state_signals->id: ")
2343  //PEN_state_signals.element(to_state, k)->get_used_svms(&num_current_svms, svm_ids);
2344  //SG_PRINT("\n")
2345  //if (nscore != 0)
2346  //SG_PRINT("%i. emmission penalty: to_state=%i to_pos=%i value=%1.2f score=%1.2f k=%i\n", i, to_state, to_pos, seq_input.element(to_state, to_pos, k), nscore, k)
2347 #ifdef DYNPROG_DEBUG
2348  SG_DEBUG("%i. emmission penalty: to_state=%i to_pos=%i value=%1.2f score=%1.2f k=%i\n", i, to_state, to_pos, seq_input.element(to_state, to_pos, k), nscore, k)
2349 #endif
2350  ((CPlifBase*) PEN_state_signals.element(to_state,k))->penalty_add_derivative(seq_input.element(to_state, to_pos, k), svm_value, 1) ; // this should be ok for long_transitions (svm_value does not matter)
2351  } else
2352  break ;
2353  }
2354 
2355  //#ifdef DYNPROG_DEBUG
2356  //SG_PRINT("scores[%i]=%f (final) \n", i, my_scores[i])
2357  //SG_PRINT("losses[%i]=%f (final) , total_loss: %f \n", i, my_losses[i], total_loss)
2358  total_score += my_scores[i] ;
2359  total_loss += my_losses[i] ;
2360  //#endif
2361  }
2362  //#ifdef DYNPROG_DEBUG
2363  //SG_PRINT("total score = %f \n", total_score)
2364  //SG_PRINT("total loss = %f \n", total_loss)
2365  //#endif
2366  SG_FREE(svm_value);
2367  SG_FREE(svm_value_part1);
2368  SG_FREE(svm_value_part2);
2369 }
2370 
2371 int32_t CDynProg::raw_intensities_interval_query(const int32_t from_pos, const int32_t to_pos, float64_t* intensities, int32_t type)
2372 {
2373  ASSERT(from_pos<to_pos)
2374  int32_t num_intensities = 0;
2375  int32_t* p_tiling_pos = &m_probe_pos[m_num_probes_cum[type-1]];
2376  float64_t* p_tiling_data = &m_raw_intensities[m_num_probes_cum[type-1]];
2377  int32_t last_pos;
2378  int32_t num = m_num_probes_cum[type-1];
2379  while (*p_tiling_pos<to_pos)
2380  {
2381  if (*p_tiling_pos>=from_pos)
2382  {
2383  intensities[num_intensities] = *p_tiling_data;
2384  num_intensities++;
2385  }
2386  num++;
2387  if (num>=m_num_probes_cum[type])
2388  break;
2389  last_pos = *p_tiling_pos;
2390  p_tiling_pos++;
2391  p_tiling_data++;
2392  ASSERT(last_pos<*p_tiling_pos)
2393  }
2394  return num_intensities;
2395 }
2396 
2397 void CDynProg::lookup_content_svm_values(const int32_t from_state, const int32_t to_state, const int32_t from_pos, const int32_t to_pos, float64_t* svm_values, int32_t frame)
2398 {
2399 #ifdef DYNPROG_TIMING_DETAIL
2400  MyTime.start() ;
2401 #endif
2402 // ASSERT(from_state<to_state)
2403 // if (!(from_pos<to_pos))
2404 // SG_ERROR("from_pos!<to_pos, from_pos: %i to_pos: %i \n",from_pos,to_pos)
2405  for (int32_t i=0;i<m_num_svms;i++)
2406  {
2407  float64_t to_val = m_lin_feat.get_element(i, to_state);
2408  float64_t from_val = m_lin_feat.get_element(i, from_state);
2409  svm_values[i] = (to_val-from_val)/(to_pos-from_pos);
2410  }
2411  for (int32_t i=m_num_svms;i<m_num_lin_feat_plifs_cum[m_num_raw_data];i++)
2412  {
2413  float64_t to_val = m_lin_feat.get_element(i, to_state);
2414  float64_t from_val = m_lin_feat.get_element(i, from_state);
2415  svm_values[i] = to_val-from_val ;
2416  }
2417  if (m_intron_list)
2418  {
2419  int32_t* support = SG_MALLOC(int32_t, m_num_intron_plifs);
2420  m_intron_list->get_intron_support(support, from_state, to_state);
2421  int32_t intron_list_start = m_num_lin_feat_plifs_cum[m_num_raw_data];
2422  int32_t intron_list_end = m_num_lin_feat_plifs_cum[m_num_raw_data]+m_num_intron_plifs;
2423  int32_t cnt = 0;
2424  for (int32_t i=intron_list_start; i<intron_list_end;i++)
2425  {
2426  svm_values[i] = (float64_t) (support[cnt]);
2427  cnt++;
2428  }
2429  //if (to_pos>3990 && to_pos<4010)
2430  // SG_PRINT("from_state:%i to_state:%i support[0]:%i support[1]:%i\n",from_state, to_state, support[0], support[1])
2431  SG_FREE(support);
2432  }
2433  // find the correct row with precomputed frame predictions
2434  if (frame!=-1)
2435  {
2436  svm_values[frame_plifs[0]] = 1e10;
2437  svm_values[frame_plifs[1]] = 1e10;
2438  svm_values[frame_plifs[2]] = 1e10;
2439  int32_t global_frame = from_pos%3;
2440  int32_t row = ((global_frame+frame)%3)+4;
2441  float64_t to_val = m_lin_feat.get_element(row, to_state);
2442  float64_t from_val = m_lin_feat.get_element(row, from_state);
2443  svm_values[frame+frame_plifs[0]] = (to_val-from_val)/(to_pos-from_pos);
2444  }
2445 #ifdef DYNPROG_TIMING_DETAIL
2446  MyTime.stop() ;
2447  content_svm_values_time += MyTime.time_diff_sec() ;
2448 #endif
2449 }
2450 void CDynProg::set_intron_list(CIntronList* intron_list, int32_t num_plifs)
2451 {
2452  m_intron_list = intron_list;
2453  m_num_intron_plifs = num_plifs;
2454 }
2455 
virtual float64_t get_max_value() const =0
CDynamicArray< float64_t > m_end_state_distribution_q_deriv
Definition: DynProg.h:616
void set_loglevel(EMessageType level)
Definition: SGIO.cpp:320
bool m_svm_arrays_clean
Definition: DynProg.h:653
CDynamicArray< float64_t > m_segment_loss
Definition: DynProg.h:690
CPlifMatrix * m_plif_matrices
Definition: DynProg.h:721
CDynamicArray< int32_t > m_positions
Definition: DynProg.h:714
void best_path_trans_deriv(int32_t *my_state_seq, int32_t *my_pos_seq, int32_t my_seq_len, const float64_t *seq_array, int32_t max_num_signals)
Definition: DynProg.cpp:1970
uint16_t *** m_wordstr
Definition: DynProg.h:686
CDynamicArray< float64_t > m_dict_weights
Definition: DynProg.h:688
static int is_finite(double f)
checks whether a float is finite
Definition: Math.cpp:259
static int32_t cum_num_words_default[5]
Definition: DynProg.h:771
virtual ~CDynProg()
Definition: DynProg.cpp:124
CDynamicArray< int32_t > m_segment_ids
Definition: DynProg.h:692
void set_dict_weights(SGMatrix< float64_t > dictionary_weights)
Definition: DynProg.cpp:759
void set_my_state_seq(int32_t *my_state_seq)
Definition: DynProg.cpp:743
static float64_t ceil(float64_t d)
Definition: Math.h:411
void set_plif_matrices(CPlifMatrix *pm)
Definition: DynProg.cpp:726
static const float64_t INFTY
infinity
Definition: Math.h:2069
void create_word_string()
Definition: DynProg.cpp:342
int32_t m_N
number of states
Definition: DynProg.h:603
static void nmin(float64_t *output, T *index, int32_t size, int32_t n)
Definition: Math.h:2312
void set_const(const T &const_element)
Definition: DynamicArray.h:396
void set_observation_matrix(SGNDArray< float64_t > seq)
Definition: DynProg.cpp:638
bool m_long_transitions
Definition: DynProg.h:753
int32_t m_max_a_id
Definition: DynProg.h:655
void set_gene_string(SGVector< char > genestr)
Definition: DynProg.cpp:735
void set_orf_info(SGMatrix< int32_t > orf_info)
Definition: DynProg.cpp:704
void get_path_scores(float64_t **my_scores, int32_t *seq_len)
Definition: DynProg.cpp:841
bool check_svm_arrays()
Definition: DynProg.cpp:582
CPlifBase ** get_plif_matrix()
Definition: PlifMatrix.h:54
CDynamicArray< int32_t > m_transition_matrix_a_id
transition matrix
Definition: DynProg.h:606
float32_t get_segment_loss(int32_t from_pos, int32_t to_pos, int32_t segment_id)
Definition: SegmentLoss.h:113
static void translate_from_single_order(ST *obs, int32_t sequence_length, int32_t start, int32_t p_order, int32_t max_val)
Definition: Alphabet.cpp:760
CDynamicArray< int32_t > m_word_degree
Definition: DynProg.h:626
int32_t * m_cum_num_words_array
Definition: DynProg.h:630
SGVector< float64_t > get_scores()
Definition: DynProg.cpp:813
T * get_array() const
Definition: DynamicArray.h:408
float64_t get_q(T_STATES offset) const
Definition: DynProg.h:364
#define SG_ERROR(...)
Definition: SGIO.h:128
int32_t * m_mod_words_array
Definition: DynProg.h:638
int32_t get_num_svms()
Definition: DynProg.cpp:173
index_t num_cols
Definition: SGMatrix.h:465
CDynamicArray< float64_t > m_lin_feat
Definition: DynProg.h:739
void compute_loss(int32_t *all_pos, int32_t len)
Definition: SegmentLoss.cpp:47
class IntronList
Definition: SegmentLoss.h:24
CPlifBase ** get_state_signals()
Definition: PlifMatrix.h:68
float64_t get_p(T_STATES offset) const
Definition: DynProg.h:384
int32_t raw_intensities_interval_query(const int32_t from_pos, const int32_t to_pos, float64_t *intensities, int32_t type)
Definition: DynProg.cpp:2371
int32_t m_num_intron_plifs
Definition: DynProg.h:733
void best_path_set_segment_ids_mask(int32_t *segment_ids, float64_t *segment_mask, int32_t m)
Definition: DynProg.cpp:795
void set_pos(SGVector< int32_t > pos)
Definition: DynProg.cpp:698
virtual float64_t lookup_penalty(float64_t p_value, float64_t *svm_values) const =0
int32_t m_seq_len
Definition: DynProg.h:663
CDynamicArray< float64_t > m_initial_state_distribution_p_deriv
Definition: DynProg.h:612
int32_t get_num_positions()
Definition: DynProg.cpp:660
CDynProg(int32_t p_num_svms=8)
Definition: DynProg.cpp:46
int32_t m_num_degrees
Definition: DynProg.h:621
CSparseFeatures< float64_t > * m_seq_sparse1
Definition: DynProg.h:717
CSparseFeatures< float64_t > * m_seq_sparse2
Definition: DynProg.h:719
#define SG_REF(x)
Definition: SGObject.h:52
class Plif
Definition: Plif.h:40
index_t num_rows
Definition: SGMatrix.h:463
int32_t * m_num_probes_cum
Definition: DynProg.h:746
float32_t get_segment_loss_extend(int32_t from_pos, int32_t to_pos, int32_t segment_id)
Definition: SegmentLoss.h:151
void precompute_content_values()
Definition: DynProg.cpp:373
int32_t * m_num_lin_feat_plifs_cum
Definition: DynProg.h:748
CDynamicArray< float64_t > m_initial_state_distribution_p
initial distribution of states
Definition: DynProg.h:611
CDynamicArray< int32_t > m_mod_words
Definition: DynProg.h:636
class IntronList
Definition: IntronList.h:22
ST get_feature(int32_t num, int32_t index)
void init_mod_words_array(SGMatrix< int32_t > p_mod_words_array)
Definition: DynProg.cpp:560
void set_a(SGMatrix< float64_t > a)
Definition: DynProg.cpp:438
index_t vlen
Definition: SGVector.h:545
#define SG_PRINT(...)
Definition: SGIO.h:136
CDynamicArray< float64_t > m_transition_matrix_a_deriv
Definition: DynProg.h:608
void set_intron_list(CIntronList *intron_list, int32_t num_plifs)
Definition: DynProg.cpp:2450
#define ASSERT(x)
Definition: SGIO.h:200
Class SGObject is the base class of all shogun objects.
Definition: SGObject.h:125
bool set_element(T e, int32_t idx1, int32_t idx2=0, int32_t idx3=0)
Definition: DynamicArray.h:306
void set_q_vector(SGVector< float64_t > q)
Definition: DynProg.cpp:431
CDynamicArray< int32_t > m_num_unique_words
Definition: DynProg.h:651
CSGObject * element(int32_t idx1, int32_t idx2=0, int32_t idx3=0)
CDynamicArray< float64_t > m_transition_matrix_a
Definition: DynProg.h:607
static int32_t mod_words_default[32]
Definition: DynProg.h:782
void resize_lin_feat(int32_t num_new_feat)
Definition: DynProg.cpp:260
void set_segment_ids(CDynamicArray< int32_t > *segment_ids)
Definition: SegmentLoss.cpp:37
void precompute_stop_codons()
Definition: DynProg.cpp:178
CDynamicArray< bool > m_genestr_stop
Definition: DynProg.h:726
void lookup_content_svm_values(const int32_t from_state, const int32_t to_state, const int32_t from_pos, const int32_t to_pos, float64_t *svm_values, int32_t frame)
Definition: DynProg.cpp:2397
CDynamicArray< int32_t > m_orf_info
Definition: DynProg.h:665
double float64_t
Definition: common.h:60
SGMatrix< int32_t > get_positions()
Definition: DynProg.cpp:831
index_t num_dims
Definition: SGNDArray.h:180
int32_t * m_string_words_array
Definition: DynProg.h:646
virtual bool uses_svm_values() const =0
CDynamicArray< int32_t > m_my_pos_seq
Definition: DynProg.h:698
SGMatrix< int32_t > get_states()
Definition: DynProg.cpp:821
CDynamicArray< int32_t > m_states
Definition: DynProg.h:712
static int32_t frame_plifs[3]
Definition: DynProg.h:775
CDynamicArray< int32_t > m_cum_num_words
Definition: DynProg.h:628
void best_path_set_segment_loss(SGMatrix< float64_t > segment_loss)
Definition: DynProg.cpp:778
CDynamicArray< int32_t > m_string_words
Definition: DynProg.h:644
CDynamicArray< int32_t > m_pos
Definition: DynProg.h:661
void set_content_type_array(SGMatrix< float64_t > seg_path)
Definition: DynProg.cpp:665
int32_t get_use_svm() const
Definition: Plif.h:191
static T max(T a, T b)
Definition: Math.h:164
Dynamic array class for CSGObject pointers that creates an array that can be used like a list or an a...
void precompute_tiling_plifs(CPlif **PEN, const int32_t *tiling_plif_ids, const int32_t num_tiling_plifs)
Definition: DynProg.cpp:293
int32_t m_long_transition_threshold
Definition: DynProg.h:756
CDynamicArray< float64_t > m_end_state_distribution_q
distribution of end-states
Definition: DynProg.h:615
bool resize_array(int32_t ndim1, int32_t ndim2=1, int32_t ndim3=1)
Definition: DynamicArray.h:387
class PlifBase
Definition: PlifBase.h:23
void set_a_trans_matrix(SGMatrix< float64_t > a_trans)
Definition: DynProg.cpp:459
float64_t * m_raw_intensities
Definition: DynProg.h:742
CDynamicArray< float64_t > m_my_losses
Definition: DynProg.h:702
CDynamicArray< float64_t > m_segment_mask
Definition: DynProg.h:694
float float32_t
Definition: common.h:59
CDynamicArray< char > m_genestr
Definition: DynProg.h:671
void set_my_pos_seq(int32_t *my_pos_seq)
Definition: DynProg.cpp:751
CDynamicArray< float64_t > m_scores
Definition: DynProg.h:710
uint8_t T_STATES
Definition: HMM.h:62
void set_array(T *p_array, int32_t p_num_elements, int32_t array_size)
Definition: DynamicArray.h:419
#define SG_UNREF(x)
Definition: SGObject.h:53
#define SG_DEBUG(...)
Definition: SGIO.h:106
all of classes and functions are contained in the shogun namespace
Definition: class_list.h:18
CDynamicArray< float64_t > m_observation_matrix
Definition: DynProg.h:659
CDynamicArray< bool > m_sign_words
Definition: DynProg.h:640
void get_intron_support(int32_t *values, int32_t from_pos, int32_t to_pos)
Definition: IntronList.cpp:102
CDynamicArray< int32_t > m_num_words
Definition: DynProg.h:632
index_t * dims
Definition: SGNDArray.h:177
int32_t get_num_states()
Definition: DynProg.cpp:215
static T min(T a, T b)
Definition: Math.h:153
CDynamicArray< float64_t > m_my_scores
Definition: DynProg.h:700
int32_t * m_probe_pos
Definition: DynProg.h:744
void set_p_vector(SGVector< float64_t > p)
Definition: DynProg.cpp:423
float64_t lookup_penalty(float64_t p_value, float64_t *svm_values) const
Definition: Plif.cpp:205
void get_path_losses(float64_t **my_losses, int32_t *seq_len)
Definition: DynProg.cpp:855
CIntronList * m_intron_list
Definition: DynProg.h:730
const T & element(int32_t idx1, int32_t idx2=0, int32_t idx3=0) const
Definition: DynamicArray.h:224
int32_t m_num_raw_data
Definition: DynProg.h:750
void init_tiling_data(int32_t *probe_pos, float64_t *intensities, const int32_t num_probes)
Definition: DynProg.cpp:220
#define SG_WARNING(...)
Definition: SGIO.h:127
virtual void penalty_clear_derivative()=0
int32_t m_num_svms
Definition: DynProg.h:623
static bool sign_words_default[16]
Definition: DynProg.h:785
static int32_t word_degree_default[4]
Definition: DynProg.h:766
void set_num_states(int32_t N)
Definition: DynProg.cpp:200
CSegmentLoss * m_seg_loss_obj
Definition: DynProg.h:706
bool extend_orf(int32_t orf_from, int32_t orf_to, int32_t start, int32_t &last_pos, int32_t to)
Definition: DynProg.cpp:871
static int32_t string_words_default[16]
Definition: DynProg.h:788
static int32_t num_words_default[4]
Definition: DynProg.h:779
CDynamicArray< int32_t > m_my_state_seq
Definition: DynProg.h:696
void set_segment_mask(CDynamicArray< float64_t > *segment_mask)
Definition: SegmentLoss.cpp:42
void init_content_svm_value_array(const int32_t p_num_svms)
Definition: DynProg.cpp:250
const T & get_element(int32_t idx1, int32_t idx2=0, int32_t idx3=0) const
Definition: DynamicArray.h:212
void compute_nbest_paths(int32_t max_num_signals, bool use_orf, int16_t nbest, bool with_loss, bool with_multiple_sequences)
Definition: DynProg.cpp:922
store plif arrays for all transitions in the model
Definition: PlifMatrix.h:31
void set_do_calc(bool b)
Definition: Plif.cpp:403
void set_sparse_features(CSparseFeatures< float64_t > *seq_sparse1, CSparseFeatures< float64_t > *seq_sparse2)
Definition: DynProg.cpp:712
static T abs(T a)
Definition: Math.h:175
void set_a_id(SGMatrix< int32_t > a)
Definition: DynProg.cpp:446

SHOGUN Machine Learning Toolbox - Documentation