SHOGUN  3.2.1
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
Random.cpp
Go to the documentation of this file.
1 /*
2  * This program is free software; you can redistribute it and/or modify
3  * it under the terms of the GNU General Public License as published by
4  * the Free Software Foundation; either version 3 of the License, or
5  * (at your option) any later version.
6  *
7  * Written (W) 2013 Viktor Gal
8  * Copyright (C) 2013 Viktor Gal
9  */
10 
12 #include <shogun/base/Parameter.h>
13 #include <shogun/lib/external/SFMT/SFMT.h>
14 #include <shogun/lib/external/dSFMT/dSFMT.h>
15 #include <shogun/lib/Time.h>
16 #include <shogun/lib/Lock.h>
17 
18 #ifdef DEV_RANDOM
19 #include <fcntl.h>
20 #endif
21 
22 #ifdef _WIN32
23 #define _CRT_RAND_S
24 #include <stdlib.h>
25 #endif
26 
27 using namespace shogun;
28 
30  : m_sfmt_32(NULL),
31  m_sfmt_64(NULL),
32  m_dsfmt(NULL)
33 {
34  m_seed = CRandom::generate_seed();
35  init();
36 }
37 
38 CRandom::CRandom(uint32_t seed)
39  : m_seed(seed),
40  m_sfmt_32(NULL),
41  m_sfmt_64(NULL),
42  m_dsfmt(NULL)
43 {
44  init();
45 }
46 
48 {
49  SG_FREE(m_x);
50  SG_FREE(m_y);
51  SG_FREE(m_xComp);
52  SG_FREE(m_sfmt_32);
53  SG_FREE(m_sfmt_64);
54  SG_FREE(m_dsfmt);
55 }
56 
57 void CRandom::set_seed(uint32_t seed)
58 {
59  reinit(seed);
60 }
61 
62 uint32_t CRandom::get_seed() const
63 {
64  return m_seed;
65 }
66 
67 void CRandom::init()
68 {
70  m_blockCount = 128;
71  m_R = 3.442619855899;
72  m_A = 9.91256303526217e-3;
73  m_uint32ToU = 1.0 / (float64_t)std::numeric_limits<uint32_t>::max();
74 
75  m_x = SG_MALLOC(float64_t, m_blockCount + 1);
76  m_y = SG_MALLOC(float64_t, m_blockCount);
77  m_xComp = SG_MALLOC(uint32_t, m_blockCount);
78 
79  // Initialise rectangle position data.
80  // m_x[i] and m_y[i] describe the top-right position ox Box i.
81 
82  // Determine top right position of the base rectangle/box (the rectangle with the Gaussian tale attached).
83  // We call this Box 0 or B0 for short.
84  // Note. x[0] also describes the right-hand edge of B1. (See diagram).
85  m_x[0] = m_R;
86  m_y[0] = GaussianPdfDenorm(m_R);
87 
88  // The next box (B1) has a right hand X edge the same as B0.
89  // Note. B1's height is the box area divided by its width, hence B1 has a smaller height than B0 because
90  // B0's total area includes the attached distribution tail.
91  m_x[1] = m_R;
92  m_y[1] = m_y[0] + (m_A / m_x[1]);
93 
94  // Calc positions of all remaining rectangles.
95  for(int i=2; i < m_blockCount; i++)
96  {
97  m_x[i] = GaussianPdfDenormInv(m_y[i-1]);
98  m_y[i] = m_y[i-1] + (m_A / m_x[i]);
99  }
100 
101  // For completeness we define the right-hand edge of a notional box 6 as being zero (a box with no area).
102  m_x[m_blockCount] = 0.0;
103 
104  // Useful precomputed values.
105  m_A_div_y0 = m_A / m_y[0];
106 
107  // Special case for base box. m_xComp[0] stores the area of B0 as a proportion of R
108  // (recalling that all segments have area A, but that the base segment is the combination of B0 and the distribution tail).
109  // Thus -m_xComp[0] is the probability that a sample point is within the box part of the segment.
110  m_xComp[0] = (uint32_t)(((m_R * m_y[0]) / m_A) * (float64_t)std::numeric_limits<uint32_t>::max());
111 
112  for(int32_t i=1; i < m_blockCount-1; i++)
113  {
114  m_xComp[i] = (uint32_t)((m_x[i+1] / m_x[i]) * (float64_t)std::numeric_limits<uint32_t>::max());
115  }
116  m_xComp[m_blockCount-1] = 0; // Shown for completeness.
117 
118  // Sanity check. Test that the top edge of the topmost rectangle is at y=1.0.
119  // Note. We expect there to be a tiny drift away from 1.0 due to the inexactness of floating
120  // point arithmetic.
121  ASSERT(CMath::abs(1.0 - m_y[m_blockCount-1]) < 1e-10);
122 
124  m_sfmt_32 = SG_MALLOC(sfmt_t, 1);
125  m_sfmt_64 = SG_MALLOC(sfmt_t, 1);
126  m_dsfmt = SG_MALLOC(dsfmt_t, 1);
127  reinit(m_seed);
128 }
129 
130 uint32_t CRandom::random_32() const
131 {
132  return sfmt_genrand_uint32(m_sfmt_32);
133 }
134 
135 uint64_t CRandom::random_64() const
136 {
137  return sfmt_genrand_uint64(m_sfmt_64);
138 }
139 
140 void CRandom::fill_array(uint32_t* array, int32_t size) const
141 {
142 #if defined(USE_ALIGNED_MEMORY) || defined(DARWIN)
143  if ((size >= sfmt_get_min_array_size32(m_sfmt_32)) && (size % 4) == 0)
144  {
145  sfmt_fill_array32(m_sfmt_32, array, size);
146  return;
147  }
148 #endif
149  for (int32_t i=0; i < size; i++)
150  array[i] = random_32();
151 }
152 
153 void CRandom::fill_array(uint64_t* array, int32_t size) const
154 {
155 #if defined(USE_ALIGNED_MEMORY) || defined(DARWIN)
156  if ((size >= sfmt_get_min_array_size64(m_sfmt_64)) && (size % 2) == 0)
157  {
158  sfmt_fill_array64(m_sfmt_64, array, size);
159  return;
160  }
161 #endif
162  for (int32_t i=0; i < size; i++)
163  array[i] = random_64();
164 }
165 
166 void CRandom::fill_array_oc(float64_t* array, int32_t size) const
167 {
168 #if defined(USE_ALIGNED_MEMORY) || defined(DARWIN)
169  if ((size >= dsfmt_get_min_array_size()) && (size % 2) == 0)
170  {
171  dsfmt_fill_array_open_close(m_dsfmt, array, size);
172  return;
173  }
174 #endif
175  for (int32_t i=0; i < size; i++)
176  array[i] = dsfmt_genrand_open_close(m_dsfmt);
177 }
178 
179 void CRandom::fill_array_co(float64_t* array, int32_t size) const
180 {
181 #if defined(USE_ALIGNED_MEMORY) || defined(DARWIN)
182  if ((size >= dsfmt_get_min_array_size()) && (size % 2) == 0)
183  {
184  dsfmt_fill_array_close_open(m_dsfmt, array, size);
185  return;
186  }
187 #endif
188  for (int32_t i=0; i < size; i++)
189  array[i] = dsfmt_genrand_close_open(m_dsfmt);
190 }
191 
192 void CRandom::fill_array_oo(float64_t* array, int32_t size) const
193 {
194 #if defined(USE_ALIGNED_MEMORY) || defined(DARWIN)
195  if ((size >= dsfmt_get_min_array_size()) && (size % 2) == 0)
196  {
197  dsfmt_fill_array_open_open(m_dsfmt, array, size);
198  return;
199  }
200 #endif
201  for (int32_t i=0; i < size; i++)
202  array[i] = dsfmt_genrand_open_open(m_dsfmt);
203 }
204 
205 void CRandom::fill_array_c1o2(float64_t* array, int32_t size) const
206 {
207 #if defined(USE_ALIGNED_MEMORY) || defined(DARWIN)
208  if ((size >= dsfmt_get_min_array_size()) && (size % 2) == 0)
209  {
210  dsfmt_fill_array_close1_open2(m_dsfmt, array, size);
211  return;
212  }
213 #endif
214  for (int32_t i=0; i < size; i++)
215  array[i] = dsfmt_genrand_close1_open2(m_dsfmt);
216 }
217 
219 {
220  return sfmt_genrand_real1(m_sfmt_32);
221 }
222 
224 {
225  return dsfmt_genrand_open_open(m_dsfmt);
226 }
227 
229 {
230  return dsfmt_genrand_close_open(m_dsfmt);
231 }
232 
234 {
235  return mu + (std_normal_distrib() * sigma);
236 }
237 
239 {
240  for (;;)
241  {
242  // Select box at random.
243  uint8_t u = random_32();
244  int32_t i = (int32_t)(u & 0x7F);
245  float64_t sign = ((u & 0x80) == 0) ? -1.0 : 1.0;
246 
247  // Generate uniform random value with range [0,0xffffffff].
248  uint32_t u2 = random_32();
249 
250  // Special case for the base segment.
251  if(0 == i)
252  {
253  if(u2 < m_xComp[0])
254  {
255  // Generated x is within R0.
256  return u2 * m_uint32ToU * m_A_div_y0 * sign;
257  }
258  // Generated x is in the tail of the distribution.
259  return sample_tail() * sign;
260  }
261 
262  // All other segments.
263  if(u2 < m_xComp[i])
264  { // Generated x is within the rectangle.
265  return u2 * m_uint32ToU * m_x[i] * sign;
266  }
267 
268  // Generated x is outside of the rectangle.
269  // Generate a random y coordinate and test if our (x,y) is within the distribution curve.
270  // This execution path is relatively slow/expensive (makes a call to Math.Exp()) but relatively rarely executed,
271  // although more often than the 'tail' path (above).
272  float64_t x = u2 * m_uint32ToU * m_x[i];
273  if(m_y[i-1] + ((m_y[i] - m_y[i-1]) * random_half_open()) < GaussianPdfDenorm(x) ) {
274  return x * sign;
275  }
276  }
277 }
278 
279 float64_t CRandom::sample_tail() const
280 {
281  float64_t x, y;
282  do
283  {
284  x = -CMath::log(random_half_open()) / m_R;
286  } while(y+y < x*x);
287  return m_R + x;
288 }
289 
290 float64_t CRandom::GaussianPdfDenorm(float64_t x) const
291 {
292  return CMath::exp(-(x*x / 2.0));
293 }
294 
295 float64_t CRandom::GaussianPdfDenormInv(float64_t y) const
296 {
297  // Operates over the y range (0,1], which happens to be the y range of the pdf,
298  // with the exception that it does not include y=0, but we would never call with
299  // y=0 so it doesn't matter. Remember that a Gaussian effectively has a tail going
300  // off into x == infinity, hence asking what is x when y=0 is an invalid question
301  // in the context of this class.
302  return CMath::sqrt(-2.0 * CMath::log(y));
303 }
304 
305 void CRandom::reinit(uint32_t seed)
306 {
307  m_state_lock.lock();
308  m_seed = seed;
309  sfmt_init_gen_rand(m_sfmt_32, m_seed);
310  sfmt_init_gen_rand(m_sfmt_64, m_seed);
311  dsfmt_init_gen_rand(m_dsfmt, m_seed);
312  m_state_lock.unlock();
313 }
314 
316 {
317  uint32_t seed;
318 #if defined(_WIN32)
319  rand_s(&seed);
320 #elif defined(HAVE_ARC4RANDOM)
321  seed = arc4random();
322 #elif defined(DEV_RANDOM)
323  int fd = open(DEV_RANDOM, O_RDONLY);
324  ASSERT(fd >= 0);
325  ssize_t actual_read =
326  read(fd, reinterpret_cast<char*>(&seed), sizeof(seed));
327  close(fd);
328  ASSERT(actual_read == sizeof(seed));
329 #else
330  SG_SWARNING("Not safe seed for the PRNG\n");
331  struct timeval tv;
332  gettimeofday(&tv, NULL);
333  seed=(uint32_t) (4223517*getpid()*tv.tv_sec*tv.tv_usec);
334 #endif
335  return seed;
336 }

SHOGUN Machine Learning Toolbox - Documentation