SHOGUN  6.0.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules
Random.cpp
Go to the documentation of this file.
1 /*
2  * This program is free software; you can redistribute it and/or modify
3  * it under the terms of the GNU General Public License as published by
4  * the Free Software Foundation; either version 3 of the License, or
5  * (at your option) any later version.
6  *
7  * Written (W) 2013 Viktor Gal
8  * Copyright (C) 2013 Viktor Gal
9  */
10 #ifdef _WIN32
11 #define _CRT_RAND_S
12 #include <stdlib.h>
13 #endif
14 
16 #include <shogun/base/Parameter.h>
17 #include <shogun/lib/external/SFMT/SFMT.h>
18 #include <shogun/lib/external/dSFMT/dSFMT.h>
19 #include <shogun/lib/Time.h>
20 #include <shogun/lib/Lock.h>
21 
22 #ifdef DEV_RANDOM
23 #include <fcntl.h>
24 #endif
25 
26 using namespace shogun;
27 
29  : m_sfmt_32(NULL),
30  m_sfmt_64(NULL),
31  m_dsfmt(NULL)
32 {
33  m_seed = CRandom::generate_seed();
34  init();
35 }
36 
37 CRandom::CRandom(uint32_t seed)
38  : m_seed(seed),
39  m_sfmt_32(NULL),
40  m_sfmt_64(NULL),
41  m_dsfmt(NULL)
42 {
43  init();
44 }
45 
47 {
48  SG_FREE(m_x);
49  SG_FREE(m_y);
50  SG_FREE(m_xComp);
51  SG_FREE(m_sfmt_32);
52  SG_FREE(m_sfmt_64);
53  SG_FREE(m_dsfmt);
54 }
55 
56 void CRandom::set_seed(uint32_t seed)
57 {
58  reinit(seed);
59 }
60 
61 uint32_t CRandom::get_seed() const
62 {
63  return m_seed;
64 }
65 
66 void CRandom::init()
67 {
69  m_blockCount = 128;
70  m_R = 3.442619855899;
71  m_A = 9.91256303526217e-3;
72  m_uint32ToU = 1.0 / (float64_t)std::numeric_limits<uint32_t>::max();
73 
74  m_x = SG_MALLOC(float64_t, m_blockCount + 1);
75  m_y = SG_MALLOC(float64_t, m_blockCount);
76  m_xComp = SG_MALLOC(uint32_t, m_blockCount);
77 
78  // Initialise rectangle position data.
79  // m_x[i] and m_y[i] describe the top-right position ox Box i.
80 
81  // Determine top right position of the base rectangle/box (the rectangle with the Gaussian tale attached).
82  // We call this Box 0 or B0 for short.
83  // Note. x[0] also describes the right-hand edge of B1. (See diagram).
84  m_x[0] = m_R;
85  m_y[0] = GaussianPdfDenorm(m_R);
86 
87  // The next box (B1) has a right hand X edge the same as B0.
88  // Note. B1's height is the box area divided by its width, hence B1 has a smaller height than B0 because
89  // B0's total area includes the attached distribution tail.
90  m_x[1] = m_R;
91  m_y[1] = m_y[0] + (m_A / m_x[1]);
92 
93  // Calc positions of all remaining rectangles.
94  for(int i=2; i < m_blockCount; i++)
95  {
96  m_x[i] = GaussianPdfDenormInv(m_y[i-1]);
97  m_y[i] = m_y[i-1] + (m_A / m_x[i]);
98  }
99 
100  // For completeness we define the right-hand edge of a notional box 6 as being zero (a box with no area).
101  m_x[m_blockCount] = 0.0;
102 
103  // Useful precomputed values.
104  m_A_div_y0 = m_A / m_y[0];
105 
106  // Special case for base box. m_xComp[0] stores the area of B0 as a proportion of R
107  // (recalling that all segments have area A, but that the base segment is the combination of B0 and the distribution tail).
108  // Thus -m_xComp[0] is the probability that a sample point is within the box part of the segment.
109  m_xComp[0] = (uint32_t)(((m_R * m_y[0]) / m_A) * (float64_t)std::numeric_limits<uint32_t>::max());
110 
111  for(int32_t i=1; i < m_blockCount-1; i++)
112  {
113  m_xComp[i] = (uint32_t)((m_x[i+1] / m_x[i]) * (float64_t)std::numeric_limits<uint32_t>::max());
114  }
115  m_xComp[m_blockCount-1] = 0; // Shown for completeness.
116 
117  // Sanity check. Test that the top edge of the topmost rectangle is at y=1.0.
118  // Note. We expect there to be a tiny drift away from 1.0 due to the inexactness of floating
119  // point arithmetic.
120  ASSERT(CMath::abs(1.0 - m_y[m_blockCount-1]) < 1e-10);
121 
123  m_sfmt_32 = SG_MALLOC(sfmt_t, 1);
124  m_sfmt_64 = SG_MALLOC(sfmt_t, 1);
125  m_dsfmt = SG_MALLOC(dsfmt_t, 1);
126  reinit(m_seed);
127 }
128 
129 uint32_t CRandom::random_32() const
130 {
131  m_state_lock.lock();
132  uint32_t v = sfmt_genrand_uint32(m_sfmt_32);
133  m_state_lock.unlock();
134  return v;
135 }
136 
137 uint64_t CRandom::random_64() const
138 {
139  m_state_lock.lock();
140  uint64_t v = sfmt_genrand_uint64(m_sfmt_64);
141  m_state_lock.unlock();
142  return v;
143 }
144 
145 void CRandom::fill_array(uint32_t* array, int32_t size) const
146 {
147 #if defined(USE_ALIGNED_MEMORY) || defined(DARWIN)
148  if ((size >= sfmt_get_min_array_size32(m_sfmt_32)) && (size % 4) == 0)
149  {
150  m_state_lock.lock();
151  sfmt_fill_array32(m_sfmt_32, array, size);
152  m_state_lock.unlock();
153  return;
154  }
155 #endif
156  for (int32_t i=0; i < size; i++)
157  array[i] = random_32();
158 }
159 
160 void CRandom::fill_array(uint64_t* array, int32_t size) const
161 {
162 #if defined(USE_ALIGNED_MEMORY) || defined(DARWIN)
163  if ((size >= sfmt_get_min_array_size64(m_sfmt_64)) && (size % 2) == 0)
164  {
165  m_state_lock.lock();
166  sfmt_fill_array64(m_sfmt_64, array, size);
167  m_state_lock.unlock();
168  return;
169  }
170 #endif
171  for (int32_t i=0; i < size; i++)
172  array[i] = random_64();
173 }
174 
175 void CRandom::fill_array_oc(float64_t* array, int32_t size) const
176 {
177  m_state_lock.lock();
178 #if defined(USE_ALIGNED_MEMORY) || defined(DARWIN)
179  if ((size >= dsfmt_get_min_array_size()) && (size % 2) == 0)
180  {
181  dsfmt_fill_array_open_close(m_dsfmt, array, size);
182  m_state_lock.unlock();
183  return;
184  }
185 #endif
186  for (int32_t i=0; i < size; i++)
187  array[i] = dsfmt_genrand_open_close(m_dsfmt);
188  m_state_lock.unlock();
189 }
190 
191 void CRandom::fill_array_co(float64_t* array, int32_t size) const
192 {
193  m_state_lock.lock();
194 #if defined(USE_ALIGNED_MEMORY) || defined(DARWIN)
195  if ((size >= dsfmt_get_min_array_size()) && (size % 2) == 0)
196  {
197  dsfmt_fill_array_close_open(m_dsfmt, array, size);
198  m_state_lock.unlock();
199  return;
200  }
201 #endif
202  for (int32_t i=0; i < size; i++)
203  array[i] = dsfmt_genrand_close_open(m_dsfmt);
204  m_state_lock.unlock();
205 }
206 
207 void CRandom::fill_array_oo(float64_t* array, int32_t size) const
208 {
209  m_state_lock.lock();
210 #if defined(USE_ALIGNED_MEMORY) || defined(DARWIN)
211  if ((size >= dsfmt_get_min_array_size()) && (size % 2) == 0)
212  {
213  dsfmt_fill_array_open_open(m_dsfmt, array, size);
214  m_state_lock.unlock();
215  return;
216  }
217 #endif
218  for (int32_t i=0; i < size; i++)
219  array[i] = dsfmt_genrand_open_open(m_dsfmt);
220  m_state_lock.unlock();
221 }
222 
223 void CRandom::fill_array_c1o2(float64_t* array, int32_t size) const
224 {
225  m_state_lock.lock();
226 #if defined(USE_ALIGNED_MEMORY) || defined(DARWIN)
227  if ((size >= dsfmt_get_min_array_size()) && (size % 2) == 0)
228  {
229  dsfmt_fill_array_close1_open2(m_dsfmt, array, size);
230  m_state_lock.unlock();
231  return;
232  }
233 #endif
234  for (int32_t i=0; i < size; i++)
235  array[i] = dsfmt_genrand_close1_open2(m_dsfmt);
236  m_state_lock.unlock();
237 }
238 
240 {
241  m_state_lock.lock();
242  float64_t v = sfmt_genrand_real1(m_sfmt_32);
243  m_state_lock.unlock();
244  return v;
245 }
246 
248 {
249  m_state_lock.lock();
250  float64_t v = dsfmt_genrand_open_open(m_dsfmt);
251  m_state_lock.unlock();
252  return v;
253 }
254 
256 {
257  m_state_lock.lock();
258  float64_t v = dsfmt_genrand_close_open(m_dsfmt);
259  m_state_lock.unlock();
260  return v;
261 }
262 
264 {
265  return mu + (std_normal_distrib() * sigma);
266 }
267 
269 {
270  for (;;)
271  {
272  // Select box at random.
273  uint8_t u = random_32();
274  int32_t i = (int32_t)(u & 0x7F);
275  float64_t sign = ((u & 0x80) == 0) ? -1.0 : 1.0;
276 
277  // Generate uniform random value with range [0,0xffffffff].
278  uint32_t u2 = random_32();
279 
280  // Special case for the base segment.
281  if(0 == i)
282  {
283  if(u2 < m_xComp[0])
284  {
285  // Generated x is within R0.
286  return u2 * m_uint32ToU * m_A_div_y0 * sign;
287  }
288  // Generated x is in the tail of the distribution.
289  return sample_tail() * sign;
290  }
291 
292  // All other segments.
293  if(u2 < m_xComp[i])
294  { // Generated x is within the rectangle.
295  return u2 * m_uint32ToU * m_x[i] * sign;
296  }
297 
298  // Generated x is outside of the rectangle.
299  // Generate a random y coordinate and test if our (x,y) is within the distribution curve.
300  // This execution path is relatively slow/expensive (makes a call to Math.Exp()) but relatively rarely executed,
301  // although more often than the 'tail' path (above).
302  float64_t x = u2 * m_uint32ToU * m_x[i];
303  if(m_y[i-1] + ((m_y[i] - m_y[i-1]) * random_half_open()) < GaussianPdfDenorm(x) ) {
304  return x * sign;
305  }
306  }
307 }
308 
309 float64_t CRandom::sample_tail() const
310 {
311  float64_t x, y;
312  float64_t m_R_reciprocal = 1.0 / m_R;
313  do
314  {
315  x = -CMath::log(random_half_open()) * m_R_reciprocal;
317  } while(y+y < x*x);
318  return m_R + x;
319 }
320 
321 float64_t CRandom::GaussianPdfDenorm(float64_t x) const
322 {
323  return CMath::exp(-(x*x * 0.5));
324 }
325 
326 float64_t CRandom::GaussianPdfDenormInv(float64_t y) const
327 {
328  // Operates over the y range (0,1], which happens to be the y range of the pdf,
329  // with the exception that it does not include y=0, but we would never call with
330  // y=0 so it doesn't matter. Remember that a Gaussian effectively has a tail going
331  // off into x == infinity, hence asking what is x when y=0 is an invalid question
332  // in the context of this class.
333  return CMath::sqrt(-2.0 * CMath::log(y));
334 }
335 
336 void CRandom::reinit(uint32_t seed)
337 {
338  m_state_lock.lock();
339  m_seed = seed;
340  sfmt_init_gen_rand(m_sfmt_32, m_seed);
341  sfmt_init_gen_rand(m_sfmt_64, m_seed);
342  dsfmt_init_gen_rand(m_dsfmt, m_seed);
343  m_state_lock.unlock();
344 }
345 
347 {
348  uint32_t seed;
349 #if defined(_WIN32)
350  rand_s(&seed);
351 #elif defined(HAVE_ARC4RANDOM)
352  seed = arc4random();
353 #elif defined(DEV_RANDOM)
354  int fd = open(DEV_RANDOM, O_RDONLY);
355  ASSERT(fd >= 0);
356  ssize_t actual_read =
357  read(fd, reinterpret_cast<char*>(&seed), sizeof(seed));
358  close(fd);
359  ASSERT(actual_read == sizeof(seed));
360 #else
361  SG_SWARNING("Not safe seed for the PRNG\n");
362  struct timeval tv;
363  gettimeofday(&tv, NULL);
364  seed=(uint32_t) (4223517*getpid()*tv.tv_sec*tv.tv_usec);
365 #endif
366  return seed;
367 }
void fill_array_c1o2(float64_t *array, int32_t size) const
Definition: Random.cpp:223
float64_t std_normal_distrib() const
Definition: Random.cpp:268
#define SG_SWARNING(...)
Definition: SGIO.h:177
void fill_array(uint32_t *array, int32_t size) const
Definition: Random.cpp:145
uint64_t random_64() const
Definition: Random.cpp:137
float64_t random_close() const
Definition: Random.cpp:239
uint32_t get_seed() const
Definition: Random.cpp:61
virtual ~CRandom()
Definition: Random.cpp:46
float64_t normal_distrib(float64_t mu, float64_t sigma) const
Definition: Random.cpp:263
#define ASSERT(x)
Definition: SGIO.h:200
void fill_array_oo(float64_t *array, int32_t size) const
Definition: Random.cpp:207
uint32_t random_32() const
Definition: Random.cpp:129
double float64_t
Definition: common.h:60
#define DEV_RANDOM
Definition: config.h:148
void fill_array_oc(float64_t *array, int32_t size) const
Definition: Random.cpp:175
SG_FORCED_INLINE void lock()
Definition: Lock.h:23
void fill_array_co(float64_t *array, int32_t size) const
Definition: Random.cpp:191
all of classes and functions are contained in the shogun namespace
Definition: class_list.h:18
void set_seed(uint32_t seed)
Definition: Random.cpp:56
static float64_t exp(float64_t x)
Definition: Math.h:616
static float64_t log(float64_t v)
Definition: Math.h:917
float64_t random_half_open() const
Definition: Random.cpp:255
SG_FORCED_INLINE void unlock()
Definition: Lock.h:34
float64_t random_open() const
Definition: Random.cpp:247
static float32_t sqrt(float32_t x)
Definition: Math.h:454
static uint32_t generate_seed()
Definition: Random.cpp:346
T max(const Container< T > &a)
static T abs(T a)
Definition: Math.h:175

SHOGUN Machine Learning Toolbox - Documentation