SHOGUN  3.2.1
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
DistantSegmentsKernel.cpp
Go to the documentation of this file.
1 /*
2  * This program is free software; you can redistribute it and/or modify
3  * it under the terms of the GNU General Public License as published by
4  * the Free Software Foundation; either version 3 of the License, or
5  * (at your option) any later version.
6  *
7  * Written (W) 2011 Heiko Strathmann
8  * DS-Kernel implementation Written (W) 2008 S├ębastien Boisvert under GPLv3
9  * Copyright (C) 2011 Berlin Institute of Technology and Max-Planck-Society
10  */
11 
13 #include <string>
14 
15 using namespace shogun;
16 
18  m_delta(0), m_theta(0)
19 {
20  init();
21 }
22 
24  int32_t theta) : CStringKernel<char>(size), m_delta(delta),
25  m_theta(theta)
26 {
27  init();
28 }
29 
31  CStringFeatures<char>* r, int32_t size, int32_t delta, int32_t theta) :
32  CStringKernel<char>(size), m_delta(delta), m_theta(theta)
33 {
34  init();
36 }
37 
38 bool CDistantSegmentsKernel::init(CFeatures* l, CFeatures* r)
39 {
40  CKernel::init(l, r);
41  return init_normalizer();
42 }
43 
44 void CDistantSegmentsKernel::init()
45 {
46  SG_ADD(&m_delta, "delta", "Delta parameter of the DS-Kernel", MS_AVAILABLE);
47  SG_ADD(&m_theta, "theta", "Theta parameter of the DS-Kernel", MS_AVAILABLE);
48 }
49 
50 float64_t CDistantSegmentsKernel::compute(int32_t idx_a, int32_t idx_b)
51 {
52  bool free_a, free_b;
53  int32_t aLength=0, bLength=0;
54  char* a=((CStringFeatures<char>*) lhs)->get_feature_vector(idx_a, aLength,
55  free_a);
56  char* b=((CStringFeatures<char>*) rhs)->get_feature_vector(idx_b, bLength,
57  free_b);
58  ASSERT(a && b)
59 
60  if ((aLength<1)||(bLength<1))
61  SG_ERROR("Empty sequences")
62 
63  float64_t result=compute(a, aLength, b, bLength, m_delta, m_theta);
64 
65  ((CStringFeatures<char>*) lhs)->free_feature_vector(a, idx_a, free_a);
66  ((CStringFeatures<char>*) rhs)->free_feature_vector(b, idx_b, free_b);
67 
68  return result;
69 }
70 
71 int32_t CDistantSegmentsKernel::bin(int32_t j, int32_t i)
72 {
73  if (i>j)
74  return 0;
75  if (i==3 && j>=3)
76  {
77  return j*(j-1)*(j-2)/6;
78  }
79  else if (i==2 && j>=2)
80  {
81  return j*(j-1)/2;
82  }
83  return 0;
84 }
85 
86 int32_t CDistantSegmentsKernel::compute(char* s, int32_t sLength, char* t,
87  int32_t tLength, int32_t delta_m, int32_t theta_m)
88 {
89  int32_t c=0;
90  int32_t* i_=SG_MALLOC(int32_t, delta_m+1);
91  int32_t* l_=SG_MALLOC(int32_t, delta_m+1);
92  for (int32_t j_s=0; j_s<=(int32_t) sLength-1; j_s++)
93  {
94  for (int32_t j_t=0; j_t<=(int32_t) tLength-1; j_t++)
95  {
96  if (s[j_s-1+1]==t[j_t-1+1])
97  {
98  int32_t n=CMath::min(CMath::min(sLength-j_s, tLength-j_t), delta_m);
99  int32_t k=-1;
100  int32_t i=1;
101  while (i<=n)
102  {
103  k++;
104  i_[2*k]=i;
105  i++;
106  while (i<=n&&s[j_s-1+i]==t[j_t-1+i])
107  i++;
108  i_[2*k+1]=i;
109  l_[k]=i_[2*k+1]-i_[2*k]+1;
110  i++;
111  while (i<=n&&s[j_s-1+i]!=t[j_t-1+i])
112  i++;
113  }
114  c+=bin(l_[0], 3)-2*bin(l_[0]-theta_m, 3)
115  +bin(l_[0]-2*theta_m, 3);
116  int32_t c1=0;
117  for (int32_t r=1; r<=k; r++)
118  {
119  c1+=bin(l_[r], 2)-bin(l_[r]-theta_m, 2);
120  }
121  c+=CMath::min(theta_m, i_[1]-i_[0])*c1;
122  }
123  }
124  }
125  SG_FREE(l_);
126  SG_FREE(i_);
127  return c;
128 }

SHOGUN Machine Learning Toolbox - Documentation