Created starter files for the project.

This commit is contained in:
Batuhan Berk Başoğlu 2020-10-02 21:26:03 -04:00
commit 73f0c0db42
1992 changed files with 769897 additions and 0 deletions

View file

@ -0,0 +1,40 @@
"""
Use cffi to access any of the underlying C functions from distributions.h
"""
import os
import numpy as np
import cffi
from .parse import parse_distributions_h
ffi = cffi.FFI()
inc_dir = os.path.join(np.get_include(), 'numpy')
# Basic numpy types
ffi.cdef('''
typedef intptr_t npy_intp;
typedef unsigned char npy_bool;
''')
parse_distributions_h(ffi, inc_dir)
lib = ffi.dlopen(np.random._generator.__file__)
# Compare the distributions.h random_standard_normal_fill to
# Generator.standard_random
bit_gen = np.random.PCG64()
rng = np.random.Generator(bit_gen)
state = bit_gen.state
interface = rng.bit_generator.cffi
n = 100
vals_cffi = ffi.new('double[%d]' % n)
lib.random_standard_normal_fill(interface.bit_generator, n, vals_cffi)
# reset the state
bit_gen.state = state
vals = rng.standard_normal(n)
for i in range(n):
assert vals[i] == vals_cffi[i]

View file

@ -0,0 +1,46 @@
import os
def parse_distributions_h(ffi, inc_dir):
"""
Parse distributions.h located in inc_dir for CFFI, filling in the ffi.cdef
Read the function declarations without the "#define ..." macros that will
be filled in when loading the library.
"""
with open(os.path.join(inc_dir, 'random', 'bitgen.h')) as fid:
s = []
for line in fid:
# massage the include file
if line.strip().startswith('#'):
continue
s.append(line)
ffi.cdef('\n'.join(s))
with open(os.path.join(inc_dir, 'random', 'distributions.h')) as fid:
s = []
in_skip = 0
for line in fid:
# massage the include file
if line.strip().startswith('#'):
continue
# skip any inlined function definition
# which starts with 'static NPY_INLINE xxx(...) {'
# and ends with a closing '}'
if line.strip().startswith('static NPY_INLINE'):
in_skip += line.count('{')
continue
elif in_skip > 0:
in_skip += line.count('{')
in_skip -= line.count('}')
continue
# replace defines with their value or remove them
line = line.replace('DECLDIR', '')
line = line.replace('NPY_INLINE', '')
line = line.replace('RAND_INT_TYPE', 'int64_t')
s.append(line)
ffi.cdef('\n'.join(s))

View file

@ -0,0 +1,78 @@
#!/usr/bin/env python3
#cython: language_level=3
from libc.stdint cimport uint32_t
from cpython.pycapsule cimport PyCapsule_IsValid, PyCapsule_GetPointer
import numpy as np
cimport numpy as np
cimport cython
from numpy.random cimport bitgen_t
from numpy.random import PCG64
np.import_array()
@cython.boundscheck(False)
@cython.wraparound(False)
def uniform_mean(Py_ssize_t n):
cdef Py_ssize_t i
cdef bitgen_t *rng
cdef const char *capsule_name = "BitGenerator"
cdef double[::1] random_values
cdef np.ndarray randoms
x = PCG64()
capsule = x.capsule
if not PyCapsule_IsValid(capsule, capsule_name):
raise ValueError("Invalid pointer to anon_func_state")
rng = <bitgen_t *> PyCapsule_GetPointer(capsule, capsule_name)
random_values = np.empty(n)
# Best practice is to acquire the lock whenever generating random values.
# This prevents other threads from modifying the state. Acquiring the lock
# is only necessary if if the GIL is also released, as in this example.
with x.lock, nogil:
for i in range(n):
random_values[i] = rng.next_double(rng.state)
randoms = np.asarray(random_values)
return randoms.mean()
# This function is declared nogil so it can be used without the GIL below
cdef uint32_t bounded_uint(uint32_t lb, uint32_t ub, bitgen_t *rng) nogil:
cdef uint32_t mask, delta, val
mask = delta = ub - lb
mask |= mask >> 1
mask |= mask >> 2
mask |= mask >> 4
mask |= mask >> 8
mask |= mask >> 16
val = rng.next_uint32(rng.state) & mask
while val > delta:
val = rng.next_uint32(rng.state) & mask
return lb + val
@cython.boundscheck(False)
@cython.wraparound(False)
def bounded_uints(uint32_t lb, uint32_t ub, Py_ssize_t n):
cdef Py_ssize_t i
cdef bitgen_t *rng
cdef uint32_t[::1] out
cdef const char *capsule_name = "BitGenerator"
x = PCG64()
out = np.empty(n, dtype=np.uint32)
capsule = x.capsule
if not PyCapsule_IsValid(capsule, capsule_name):
raise ValueError("Invalid pointer to anon_func_state")
rng = <bitgen_t *>PyCapsule_GetPointer(capsule, capsule_name)
with x.lock, nogil:
for i in range(n):
out[i] = bounded_uint(lb, ub, rng)
return np.asarray(out)

View file

@ -0,0 +1,117 @@
#!/usr/bin/env python3
#cython: language_level=3
"""
This file shows how the to use a BitGenerator to create a distribution.
"""
import numpy as np
cimport numpy as np
cimport cython
from cpython.pycapsule cimport PyCapsule_IsValid, PyCapsule_GetPointer
from libc.stdint cimport uint16_t, uint64_t
from numpy.random cimport bitgen_t
from numpy.random import PCG64
from numpy.random.c_distributions cimport (
random_standard_uniform_fill, random_standard_uniform_fill_f)
@cython.boundscheck(False)
@cython.wraparound(False)
def uniforms(Py_ssize_t n):
"""
Create an array of `n` uniformly distributed doubles.
A 'real' distribution would want to process the values into
some non-uniform distribution
"""
cdef Py_ssize_t i
cdef bitgen_t *rng
cdef const char *capsule_name = "BitGenerator"
cdef double[::1] random_values
x = PCG64()
capsule = x.capsule
# Optional check that the capsule if from a BitGenerator
if not PyCapsule_IsValid(capsule, capsule_name):
raise ValueError("Invalid pointer to anon_func_state")
# Cast the pointer
rng = <bitgen_t *> PyCapsule_GetPointer(capsule, capsule_name)
random_values = np.empty(n, dtype='float64')
with x.lock, nogil:
for i in range(n):
# Call the function
random_values[i] = rng.next_double(rng.state)
randoms = np.asarray(random_values)
return randoms
# cython example 2
@cython.boundscheck(False)
@cython.wraparound(False)
def uint10_uniforms(Py_ssize_t n):
"""Uniform 10 bit integers stored as 16-bit unsigned integers"""
cdef Py_ssize_t i
cdef bitgen_t *rng
cdef const char *capsule_name = "BitGenerator"
cdef uint16_t[::1] random_values
cdef int bits_remaining
cdef int width = 10
cdef uint64_t buff, mask = 0x3FF
x = PCG64()
capsule = x.capsule
if not PyCapsule_IsValid(capsule, capsule_name):
raise ValueError("Invalid pointer to anon_func_state")
rng = <bitgen_t *> PyCapsule_GetPointer(capsule, capsule_name)
random_values = np.empty(n, dtype='uint16')
# Best practice is to release GIL and acquire the lock
bits_remaining = 0
with x.lock, nogil:
for i in range(n):
if bits_remaining < width:
buff = rng.next_uint64(rng.state)
random_values[i] = buff & mask
buff >>= width
randoms = np.asarray(random_values)
return randoms
# cython example 3
def uniforms_ex(bit_generator, Py_ssize_t n, dtype=np.float64):
"""
Create an array of `n` uniformly distributed doubles via a "fill" function.
A 'real' distribution would want to process the values into
some non-uniform distribution
Parameters
----------
bit_generator: BitGenerator instance
n: int
Output vector length
dtype: {str, dtype}, optional
Desired dtype, either 'd' (or 'float64') or 'f' (or 'float32'). The
default dtype value is 'd'
"""
cdef Py_ssize_t i
cdef bitgen_t *rng
cdef const char *capsule_name = "BitGenerator"
cdef np.ndarray randoms
capsule = bit_generator.capsule
# Optional check that the capsule if from a BitGenerator
if not PyCapsule_IsValid(capsule, capsule_name):
raise ValueError("Invalid pointer to anon_func_state")
# Cast the pointer
rng = <bitgen_t *> PyCapsule_GetPointer(capsule, capsule_name)
_dtype = np.dtype(dtype)
randoms = np.empty(n, dtype=_dtype)
if _dtype == np.float32:
with bit_generator.lock:
random_standard_uniform_fill_f(rng, n, <float*>np.PyArray_DATA(randoms))
elif _dtype == np.float64:
with bit_generator.lock:
random_standard_uniform_fill(rng, n, <double*>np.PyArray_DATA(randoms))
else:
raise TypeError('Unsupported dtype %r for random' % _dtype)
return randoms

View file

@ -0,0 +1,41 @@
#!/usr/bin/env python3
"""
Build the Cython demonstrations of low-level access to NumPy random
Usage: python setup.py build_ext -i
"""
import numpy as np
from distutils.core import setup
from Cython.Build import cythonize
from setuptools.extension import Extension
from os.path import join, dirname
path = dirname(__file__)
src_dir = join(dirname(path), '..', 'src')
defs = [('NPY_NO_DEPRECATED_API', 0)]
inc_path = np.get_include()
# not so nice. We need the random/lib library from numpy
lib_path = join(np.get_include(), '..', '..', 'random', 'lib')
extending = Extension("extending",
sources=[join(path, 'extending.pyx')],
include_dirs=[
np.get_include(),
join(path, '..', '..')
],
define_macros=defs,
)
distributions = Extension("extending_distributions",
sources=[join(path, 'extending_distributions.pyx')],
include_dirs=[inc_path],
library_dirs=[lib_path],
libraries=['npyrandom'],
define_macros=defs,
)
extensions = [extending, distributions]
setup(
ext_modules=cythonize(extensions)
)

View file

@ -0,0 +1,84 @@
import numpy as np
import numba as nb
from numpy.random import PCG64
from timeit import timeit
bit_gen = PCG64()
next_d = bit_gen.cffi.next_double
state_addr = bit_gen.cffi.state_address
def normals(n, state):
out = np.empty(n)
for i in range((n + 1) // 2):
x1 = 2.0 * next_d(state) - 1.0
x2 = 2.0 * next_d(state) - 1.0
r2 = x1 * x1 + x2 * x2
while r2 >= 1.0 or r2 == 0.0:
x1 = 2.0 * next_d(state) - 1.0
x2 = 2.0 * next_d(state) - 1.0
r2 = x1 * x1 + x2 * x2
f = np.sqrt(-2.0 * np.log(r2) / r2)
out[2 * i] = f * x1
if 2 * i + 1 < n:
out[2 * i + 1] = f * x2
return out
# Compile using Numba
normalsj = nb.jit(normals, nopython=True)
# Must use state address not state with numba
n = 10000
def numbacall():
return normalsj(n, state_addr)
rg = np.random.Generator(PCG64())
def numpycall():
return rg.normal(size=n)
# Check that the functions work
r1 = numbacall()
r2 = numpycall()
assert r1.shape == (n,)
assert r1.shape == r2.shape
t1 = timeit(numbacall, number=1000)
print('{:.2f} secs for {} PCG64 (Numba/PCG64) gaussian randoms'.format(t1, n))
t2 = timeit(numpycall, number=1000)
print('{:.2f} secs for {} PCG64 (NumPy/PCG64) gaussian randoms'.format(t2, n))
# example 2
next_u32 = bit_gen.ctypes.next_uint32
ctypes_state = bit_gen.ctypes.state
@nb.jit(nopython=True)
def bounded_uint(lb, ub, state):
mask = delta = ub - lb
mask |= mask >> 1
mask |= mask >> 2
mask |= mask >> 4
mask |= mask >> 8
mask |= mask >> 16
val = next_u32(state) & mask
while val > delta:
val = next_u32(state) & mask
return lb + val
print(bounded_uint(323, 2394691, ctypes_state.value))
@nb.jit(nopython=True)
def bounded_uints(lb, ub, n, state):
out = np.empty(n, dtype=np.uint32)
for i in range(n):
out[i] = bounded_uint(lb, ub, state)
bounded_uints(323, 2394691, 10000000, ctypes_state.value)

View file

@ -0,0 +1,67 @@
r"""
Building the required library in this example requires a source distribution
of NumPy or clone of the NumPy git repository since distributions.c is not
included in binary distributions.
On *nix, execute in numpy/random/src/distributions
export ${PYTHON_VERSION}=3.8 # Python version
export PYTHON_INCLUDE=#path to Python's include folder, usually \
${PYTHON_HOME}/include/python${PYTHON_VERSION}m
export NUMPY_INCLUDE=#path to numpy's include folder, usually \
${PYTHON_HOME}/lib/python${PYTHON_VERSION}/site-packages/numpy/core/include
gcc -shared -o libdistributions.so -fPIC distributions.c \
-I${NUMPY_INCLUDE} -I${PYTHON_INCLUDE}
mv libdistributions.so ../../_examples/numba/
On Windows
rem PYTHON_HOME and PYTHON_VERSION are setup dependent, this is an example
set PYTHON_HOME=c:\Anaconda
set PYTHON_VERSION=38
cl.exe /LD .\distributions.c -DDLL_EXPORT \
-I%PYTHON_HOME%\lib\site-packages\numpy\core\include \
-I%PYTHON_HOME%\include %PYTHON_HOME%\libs\python%PYTHON_VERSION%.lib
move distributions.dll ../../_examples/numba/
"""
import os
import numba as nb
import numpy as np
from cffi import FFI
from numpy.random import PCG64
ffi = FFI()
if os.path.exists('./distributions.dll'):
lib = ffi.dlopen('./distributions.dll')
elif os.path.exists('./libdistributions.so'):
lib = ffi.dlopen('./libdistributions.so')
else:
raise RuntimeError('Required DLL/so file was not found.')
ffi.cdef("""
double random_standard_normal(void *bitgen_state);
""")
x = PCG64()
xffi = x.cffi
bit_generator = xffi.bit_generator
random_standard_normal = lib.random_standard_normal
def normals(n, bit_generator):
out = np.empty(n)
for i in range(n):
out[i] = random_standard_normal(bit_generator)
return out
normalsj = nb.jit(normals, nopython=True)
# Numba requires a memory address for void *
# Can also get address from x.ctypes.bit_generator.value
bit_generator_address = int(ffi.cast('uintptr_t', bit_generator))
norm = normalsj(1000, bit_generator_address)
print(norm[:12])