Skip to content

Commit

Permalink
Merge pull request #17 from kellieotto/permutations
Browse files Browse the repository at this point in the history
Permutations
  • Loading branch information
kellieotto authored Sep 13, 2018
2 parents 09191e5 + 04aa380 commit 604545b
Show file tree
Hide file tree
Showing 7 changed files with 237 additions and 29 deletions.
8 changes: 4 additions & 4 deletions RELEASE.txt
Original file line number Diff line number Diff line change
Expand Up @@ -35,10 +35,10 @@ How to make a new release of ``cryptorandom``

- Publish on PyPi::

python setup.py register
python setup.py sdist upload
python setup.py bdist_wheel upload

python setup.py sdist
python setup.py bdist_wheel
twine upload dist/*
- Increase the version number

- In ``setup.py``, set to ``0.Xdev``.
Expand Down
129 changes: 119 additions & 10 deletions cryptorandom/sample.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,13 +42,13 @@ def random_sample(a, size, replace=False, p=None, method="sample_by_index", prng
Sampling methods available are:
Fisher-Yates: sampling without weights, without replacement
PIKK: sampling without weights, without replacement (deprecated)
Cormen: samping without weights, without replacement
PIKK: sampling without weights, without replacement
recursive: samping without weights, without replacement
Waterman_R: sampling without weights, without replacement
Vitter_Z: sampling without weights, without replacement
sample_by_index: sampling without weights, without replacement
Exponential: sampling with weights, without replacement (deprecated)
Exponential: sampling with weights, without replacement
Elimination: sampling with weights, without replacement
...
Expand Down Expand Up @@ -121,6 +121,55 @@ def random_sample(a, size, replace=False, p=None, method="sample_by_index", prng
return a[sam]


def random_permutation(a, method="Fisher-Yates", prng=None):
'''
Construct a random permutation (re-ordering) of a population `a`.
The algorithms available are:
Fisher-Yates: a shuffling algorithm
random_sort: generate random floats and sort
permute_by_index: sample integer indices without replacement
Parameters
----------
a : 1-D array-like or int
If an array or list, a random permutation is generated from its elements.
If an int, the random permutation is generated as if a were np.arange(a)
method : string
Which sampling function?
prng : {None, int, object}
If prng is None, return a randomly seeded instance of SHA256.
If prng is an int, return a new SHA256 instance seeded with seed.
If prng is already a PRNG instance, return it.
Returns
-------
samples : single item or ndarray
The generated random samples
'''
prng = get_prng(prng)
if isinstance(a, (list, np.ndarray)):
N = len(a)
a = np.array(a)
elif isinstance(a, int):
N = a
a = np.arange(N)
assert N > 0, "Population size must be nonnegative"
else:
raise ValueError("a must be an integer or array-like")

methods = {
"Fisher-Yates" : lambda N: fykd_shuffle(N, prng=prng),
"random_sort" : lambda N: pikk_shuffle(N, prng=prng),
"permute_by_index" : lambda N: permute_by_index(N, prng=prng),
}

try:
sam = np.array(methods[method](N), dtype=np.int) - 1 # shift to 0 indexing
except ValueError:
print("Bad permutation algorithm")
return a[sam]


###################### Sampling functions #####################################

def fykd_sample(n, k, prng=None):
Expand All @@ -142,7 +191,7 @@ def fykd_sample(n, k, prng=None):
list of items sampled
'''
prng = get_prng(prng)
a = list(range(1, n+1))
a = np.array(range(1, n+1))
rand = prng.random(k)
ind = np.array(range(k))
JJ = np.array(ind + rand*(n - ind), dtype=int)
Expand Down Expand Up @@ -201,14 +250,14 @@ def recursive_sample(n, k, prng=None):
'''
prng = get_prng(prng)
if k == 0:
return []
return np.empty(0, dtype=np.int)
else:
S = recursive_sample(n-1, k-1, prng=prng)
i = prng.randint(1, n+1)
if i in S:
S.append(n)
S = np.append(S, [n])
else:
S.append(i)
S = np.append(S, [i])
return S


Expand All @@ -232,7 +281,7 @@ def waterman_r(n, k, prng=None):
list of items sampled
'''
prng = get_prng(prng)
S = list(range(1, k+1)) # fill the reservoir
S = np.array(range(1, k+1)) # fill the reservoir
for t in range(k+1, n+1):
i = prng.randint(1, t+1)
if i <= k:
Expand Down Expand Up @@ -286,7 +335,7 @@ def h(x, t):
def c(t):
return (t+1)/(t-k+1)

sam = list(range(1, k+1)) # fill the reservoir
sam = np.array(range(1, k+1)) # fill the reservoir
t = k

while t <= n:
Expand Down Expand Up @@ -343,7 +392,7 @@ def sample_by_index(n, k, prng=None):
if w < nprime:
Pop[w-1] = lastvalue # Move last population item to the wth position
nprime = nprime - 1
return S
return np.array(S)


def elimination_sample(k, p, replace=True, prng=None):
Expand Down Expand Up @@ -451,3 +500,63 @@ def exponential_sample(k, p, prng=None):
sam = -np.log(sam)/weights
sample = sam.argsort()[0:k]
return sample+1

######################## Permutation functions #################################

def fykd_shuffle(n, prng=None):
'''
Use Fisher-Yates-Knuth-Durstenfeld algorithm to permute 1, ..., n
Parameters
----------
n : int
Population size
prng : {None, int, object}
If prng is None, return a randomly seeded instance of SHA256.
If prng is an int, return a new SHA256 instance seeded with seed.
If prng is already a PRNG instance, return it.
Returns
-------
permuted list of {1, ..., n}
'''
return fykd_sample(n, n, prng=prng)


def pikk_shuffle(n, prng=None):
'''
Assign random values between 0 and 1 to the numbers 1, ..., n and sort them
according to these random values.
Parameters
----------
n : int
Population size
prng : {None, int, object}
If prng is None, return a randomly seeded instance of SHA256.
If prng is an int, return a new SHA256 instance seeded with seed.
If prng is already a PRNG instance, return it.
Returns
-------
list of items sampled
'''
prng = get_prng(prng)
return np.argsort(prng.random(n)) + 1


def permute_by_index(n, prng=None):
'''
Select indices uniformly at random, without replacement, to permute 1, ..., n
Parameters
----------
n : int
Population size
prng : {None, int, object}
If prng is None, return a randomly seeded instance of SHA256.
If prng is an int, return a new SHA256 instance seeded with seed.
If prng is already a PRNG instance, return it.
Returns
-------
list of items sampled
'''
return sample_by_index(n, n, prng=prng)
56 changes: 50 additions & 6 deletions cryptorandom/tests/test_sample.py
Original file line number Diff line number Diff line change
Expand Up @@ -132,7 +132,7 @@ def test_fykd():
"""
ff = fake_generator()
sam = fykd_sample(5, 2, prng=ff)
assert sam == [1, 2]
assert (sam == [1, 2]).all()

ff = fake_generator()
sam = random_sample(5, 2, method="Fisher-Yates", prng=ff)
Expand Down Expand Up @@ -163,7 +163,7 @@ def test_recursive_sample():
"""
ff = fake_generator()
sam = recursive_sample(5, 2, prng=ff)
assert sam == [2, 3]
assert (sam == [2, 3]).all()

ff = fake_generator()
sam = random_sample(5, 2, method="recursive", prng=ff)
Expand All @@ -181,7 +181,7 @@ def test_waterman_r():
"""
ff = fake_generator()
sam = waterman_r(5, 2, prng=ff)
assert sam == [1, 3]
assert (sam == [1, 3]).all()

ff = fake_generator()
sam = random_sample(5, 2, method="Waterman_R", prng=ff)
Expand All @@ -194,7 +194,7 @@ def test_sbi():
"""
ff = fake_generator()
sam = sample_by_index(5, 2, prng=ff)
assert sam == [2, 3]
assert (sam == [2, 3]).all()

ff = fake_generator()
sam = random_sample(5, 2, method="sample_by_index", prng=ff)
Expand All @@ -207,15 +207,15 @@ def test_vitter_z():
"""
ff = fake_generator()
sam = vitter_z(5, 2, prng=ff)
assert sam == [4, 2]
assert (sam == [4, 2]).all()

ff = fake_generator()
sam = random_sample(5, 2, method="Vitter_Z", prng=ff)
assert (sam+1 == [4, 2]).all() # shift to 1-index

ff = fake_generator()
sam = vitter_z(500, 2, prng=ff)
assert sam == [420, 265]
assert (sam == [420, 265]).all()

ff = fake_generator()
sam = random_sample(500, 2, method="Vitter_Z", prng=ff)
Expand Down Expand Up @@ -254,3 +254,47 @@ def test_exponential_sample():
ff = fake_generator()
sam = random_sample(5, 2, p=[0.2]*5, replace=False, method="Exponential", prng=ff)
assert (sam+1 == [5, 4]).all() # shift to 1-index


def test_fykd_shuffle():
"""
Test Fisher-Yates shuffle for random permutations, fykd_shuffle
"""
ff = fake_generator()
sam = fykd_shuffle(5, prng=ff)
assert (sam == [1, 2, 3, 4, 5]).all()

ff = fake_generator()
sam = random_permutation(5, method="Fisher-Yates", prng=ff)
assert (sam+1 == [1, 2, 3, 4, 5]).all() # shift to 1-index

ff = fake_generator()
fruit = ['apple', 'banana', 'cherry', 'pear', 'plum']
sam = random_permutation(fruit, method="Fisher-Yates", prng=ff)
assert (sam == fruit).all()


def test_pikk_shuffle():
"""
Test PIKK shuffling
"""
ff = fake_generator()
sam = pikk_shuffle(5, prng=ff)
assert (sam == [1, 2, 3, 4, 5]).all()

ff = fake_generator()
sam = random_permutation(5, method="random_sort", prng=ff)
assert (sam+1 == [1, 2, 3, 4, 5]).all() # shift to 1-index


def test_permute_by_index():
"""
Test permuting by index shuffling
"""
ff = fake_generator()
sam = permute_by_index(5, prng=ff)
assert (sam == [2, 3, 1, 4, 5]).all()

ff = fake_generator()
sam = random_permutation(5, method="permute_by_index", prng=ff)
assert (sam+1 == [2, 3, 1, 4, 5]).all() # shift to 1-index
2 changes: 1 addition & 1 deletion cryptorandom/version.py
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
# THIS FILE IS GENERATED FROM THE CRYPTORANDOM SETUP.PY
version='0.1dev'
version='0.1'
3 changes: 2 additions & 1 deletion docs/examples/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -7,4 +7,5 @@ Contents:
:maxdepth: 2

prng.rst
sample.rst
sample.rst
permutations.rst
50 changes: 50 additions & 0 deletions docs/examples/permutations.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
Permuting a list
----------------

.. code::
>>> from cryptorandom.cryptorandom import SHA256
>>> from cryptorandom.sample import random_permutation
>>> import numpy as np
The `sample` module contains methods for generating random permutations, compatible with any pseudorandom number generator that has `randint` and `random` methods. The module includes several algorithms to permute lists or arrays.

The main workhorse is the `random_permutation` function. The default algorithm is `Fisher-Yates`, a shuffling method.

.. code::
>>> fruit = ['apple', 'banana', 'cherry', 'pear', 'plum']
>>> s = SHA256(1234567890)
>>> random_permutation(fruit, prng=s)
array(['plum', 'apple', 'banana', 'pear', 'cherry'], dtype='<U6')
Numpy and the base random module offer methods for drawing simple random samples with and without replacement, but don't allow you to choose the pseudorandom number generator. Numpy's `choice` method also uses the Fisher-Yates method.

.. code::
>>> np.random.permutation(fruit) # Returns permuted list
array(['apple', 'banana', 'plum', 'cherry', 'pear'], dtype='<U6')
>>> np.random.shuffle(fruit) # Permutes the list in place, returns None
>>> fruit
['cherry', 'plum', 'pear', 'banana', 'apple']
The permutation algorithms available are:

================ ===============================================
Method description
================ ===============================================
Fisher-Yates a shuffling algorithm
random_sort generate random floats and sort
permute_by_index sample integer indices without replacement
================ ===============================================

.. code::
>>> %timeit random_permutation(fruit, method="Fisher-Yates", prng=s)
10000 loops, best of 3: 53.3 µs per loop
>>> %timeit random_permutation(fruit, method="random_sort", prng=s)
10000 loops, best of 3: 37.5 µs per loop
>>> %timeit random_permutation(fruit, method="permute_by_index", prng=s)
10000 loops, best of 3: 22 µs per loop
Loading

0 comments on commit 604545b

Please sign in to comment.