From e6b8729ee31fb950c932d70bb991b3effa2216ec Mon Sep 17 00:00:00 2001 From: Kellie Ottoboni Date: Wed, 12 Sep 2018 14:09:27 -0700 Subject: [PATCH 1/4] DOC: release-related doc updates --- RELEASE.txt | 8 ++++---- cryptorandom/version.py | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/RELEASE.txt b/RELEASE.txt index 848a814..20f2ba3 100644 --- a/RELEASE.txt +++ b/RELEASE.txt @@ -35,10 +35,10 @@ How to make a new release of ``cryptorandom`` - Publish on PyPi:: - python setup.py register - python setup.py sdist upload - python setup.py bdist_wheel upload - + python setup.py sdist + python setup.py bdist_wheel + twine upload dist/* + - Increase the version number - In ``setup.py``, set to ``0.Xdev``. diff --git a/cryptorandom/version.py b/cryptorandom/version.py index d7bbe44..38a24c3 100644 --- a/cryptorandom/version.py +++ b/cryptorandom/version.py @@ -1,2 +1,2 @@ # THIS FILE IS GENERATED FROM THE CRYPTORANDOM SETUP.PY -version='0.1dev' +version='0.1' From aa4a37f9fb67d82f0d5a175fa40772caf6367280 Mon Sep 17 00:00:00 2001 From: Kellie Ottoboni Date: Thu, 13 Sep 2018 11:14:07 -0700 Subject: [PATCH 2/4] ENH: add random_permutation functionality and make sure all funs return np.array --- cryptorandom/sample.py | 129 +++++++++++++++++++++++++++++++++++++---- 1 file changed, 119 insertions(+), 10 deletions(-) diff --git a/cryptorandom/sample.py b/cryptorandom/sample.py index 45d76f6..ae22568 100644 --- a/cryptorandom/sample.py +++ b/cryptorandom/sample.py @@ -42,13 +42,13 @@ def random_sample(a, size, replace=False, p=None, method="sample_by_index", prng Sampling methods available are: Fisher-Yates: sampling without weights, without replacement - PIKK: sampling without weights, without replacement (deprecated) - Cormen: samping without weights, without replacement + PIKK: sampling without weights, without replacement + recursive: samping without weights, without replacement Waterman_R: sampling without weights, without replacement Vitter_Z: sampling without weights, without replacement sample_by_index: sampling without weights, without replacement - Exponential: sampling with weights, without replacement (deprecated) + Exponential: sampling with weights, without replacement Elimination: sampling with weights, without replacement ... @@ -121,6 +121,55 @@ def random_sample(a, size, replace=False, p=None, method="sample_by_index", prng return a[sam] +def random_permutation(a, method="Fisher-Yates", prng=None): + ''' + Construct a random permutation (re-ordering) of a population `a`. + + The algorithms available are: + Fisher-Yates: a shuffling algorithm + random_sort: generate random floats and sort + permute_by_index: sample integer indices without replacement + + Parameters + ---------- + a : 1-D array-like or int + If an array or list, a random permutation is generated from its elements. + If an int, the random permutation is generated as if a were np.arange(a) + method : string + Which sampling function? + prng : {None, int, object} + If prng is None, return a randomly seeded instance of SHA256. + If prng is an int, return a new SHA256 instance seeded with seed. + If prng is already a PRNG instance, return it. + Returns + ------- + samples : single item or ndarray + The generated random samples + ''' + prng = get_prng(prng) + if isinstance(a, (list, np.ndarray)): + N = len(a) + a = np.array(a) + elif isinstance(a, int): + N = a + a = np.arange(N) + assert N > 0, "Population size must be nonnegative" + else: + raise ValueError("a must be an integer or array-like") + + methods = { + "Fisher-Yates" : lambda N: fykd_shuffle(N, prng=prng), + "random_sort" : lambda N: pikk_shuffle(N, prng=prng), + "permute_by_index" : lambda N: permute_by_index(N, prng=prng), + } + + try: + sam = np.array(methods[method](N), dtype=np.int) - 1 # shift to 0 indexing + except ValueError: + print("Bad permutation algorithm") + return a[sam] + + ###################### Sampling functions ##################################### def fykd_sample(n, k, prng=None): @@ -142,7 +191,7 @@ def fykd_sample(n, k, prng=None): list of items sampled ''' prng = get_prng(prng) - a = list(range(1, n+1)) + a = np.array(range(1, n+1)) rand = prng.random(k) ind = np.array(range(k)) JJ = np.array(ind + rand*(n - ind), dtype=int) @@ -201,14 +250,14 @@ def recursive_sample(n, k, prng=None): ''' prng = get_prng(prng) if k == 0: - return [] + return np.empty(0, dtype=np.int) else: S = recursive_sample(n-1, k-1, prng=prng) i = prng.randint(1, n+1) if i in S: - S.append(n) + S = np.append(S, [n]) else: - S.append(i) + S = np.append(S, [i]) return S @@ -232,7 +281,7 @@ def waterman_r(n, k, prng=None): list of items sampled ''' prng = get_prng(prng) - S = list(range(1, k+1)) # fill the reservoir + S = np.array(range(1, k+1)) # fill the reservoir for t in range(k+1, n+1): i = prng.randint(1, t+1) if i <= k: @@ -286,7 +335,7 @@ def h(x, t): def c(t): return (t+1)/(t-k+1) - sam = list(range(1, k+1)) # fill the reservoir + sam = np.array(range(1, k+1)) # fill the reservoir t = k while t <= n: @@ -343,7 +392,7 @@ def sample_by_index(n, k, prng=None): if w < nprime: Pop[w-1] = lastvalue # Move last population item to the wth position nprime = nprime - 1 - return S + return np.array(S) def elimination_sample(k, p, replace=True, prng=None): @@ -451,3 +500,63 @@ def exponential_sample(k, p, prng=None): sam = -np.log(sam)/weights sample = sam.argsort()[0:k] return sample+1 + +######################## Permutation functions ################################# + +def fykd_shuffle(n, prng=None): + ''' + Use Fisher-Yates-Knuth-Durstenfeld algorithm to permute 1, ..., n + + Parameters + ---------- + n : int + Population size + prng : {None, int, object} + If prng is None, return a randomly seeded instance of SHA256. + If prng is an int, return a new SHA256 instance seeded with seed. + If prng is already a PRNG instance, return it. + Returns + ------- + permuted list of {1, ..., n} + ''' + return fykd_sample(n, n, prng=prng) + + +def pikk_shuffle(n, prng=None): + ''' + Assign random values between 0 and 1 to the numbers 1, ..., n and sort them + according to these random values. + + Parameters + ---------- + n : int + Population size + prng : {None, int, object} + If prng is None, return a randomly seeded instance of SHA256. + If prng is an int, return a new SHA256 instance seeded with seed. + If prng is already a PRNG instance, return it. + Returns + ------- + list of items sampled + ''' + prng = get_prng(prng) + return np.argsort(prng.random(n)) + 1 + + +def permute_by_index(n, prng=None): + ''' + Select indices uniformly at random, without replacement, to permute 1, ..., n + + Parameters + ---------- + n : int + Population size + prng : {None, int, object} + If prng is None, return a randomly seeded instance of SHA256. + If prng is an int, return a new SHA256 instance seeded with seed. + If prng is already a PRNG instance, return it. + Returns + ------- + list of items sampled + ''' + return sample_by_index(n, n, prng=prng) From 65d5a3350ec263fd48fc335535243f36eac076cc Mon Sep 17 00:00:00 2001 From: Kellie Ottoboni Date: Thu, 13 Sep 2018 11:14:33 -0700 Subject: [PATCH 3/4] TST: make sure tests work for np.array outputs and add tests for permutations --- cryptorandom/tests/test_sample.py | 56 +++++++++++++++++++++++++++---- 1 file changed, 50 insertions(+), 6 deletions(-) diff --git a/cryptorandom/tests/test_sample.py b/cryptorandom/tests/test_sample.py index 1823f3f..3198a4e 100644 --- a/cryptorandom/tests/test_sample.py +++ b/cryptorandom/tests/test_sample.py @@ -132,7 +132,7 @@ def test_fykd(): """ ff = fake_generator() sam = fykd_sample(5, 2, prng=ff) - assert sam == [1, 2] + assert (sam == [1, 2]).all() ff = fake_generator() sam = random_sample(5, 2, method="Fisher-Yates", prng=ff) @@ -163,7 +163,7 @@ def test_recursive_sample(): """ ff = fake_generator() sam = recursive_sample(5, 2, prng=ff) - assert sam == [2, 3] + assert (sam == [2, 3]).all() ff = fake_generator() sam = random_sample(5, 2, method="recursive", prng=ff) @@ -181,7 +181,7 @@ def test_waterman_r(): """ ff = fake_generator() sam = waterman_r(5, 2, prng=ff) - assert sam == [1, 3] + assert (sam == [1, 3]).all() ff = fake_generator() sam = random_sample(5, 2, method="Waterman_R", prng=ff) @@ -194,7 +194,7 @@ def test_sbi(): """ ff = fake_generator() sam = sample_by_index(5, 2, prng=ff) - assert sam == [2, 3] + assert (sam == [2, 3]).all() ff = fake_generator() sam = random_sample(5, 2, method="sample_by_index", prng=ff) @@ -207,7 +207,7 @@ def test_vitter_z(): """ ff = fake_generator() sam = vitter_z(5, 2, prng=ff) - assert sam == [4, 2] + assert (sam == [4, 2]).all() ff = fake_generator() sam = random_sample(5, 2, method="Vitter_Z", prng=ff) @@ -215,7 +215,7 @@ def test_vitter_z(): ff = fake_generator() sam = vitter_z(500, 2, prng=ff) - assert sam == [420, 265] + assert (sam == [420, 265]).all() ff = fake_generator() sam = random_sample(500, 2, method="Vitter_Z", prng=ff) @@ -254,3 +254,47 @@ def test_exponential_sample(): ff = fake_generator() sam = random_sample(5, 2, p=[0.2]*5, replace=False, method="Exponential", prng=ff) assert (sam+1 == [5, 4]).all() # shift to 1-index + + +def test_fykd_shuffle(): + """ + Test Fisher-Yates shuffle for random permutations, fykd_shuffle + """ + ff = fake_generator() + sam = fykd_shuffle(5, prng=ff) + assert (sam == [1, 2, 3, 4, 5]).all() + + ff = fake_generator() + sam = random_permutation(5, method="Fisher-Yates", prng=ff) + assert (sam+1 == [1, 2, 3, 4, 5]).all() # shift to 1-index + + ff = fake_generator() + fruit = ['apple', 'banana', 'cherry', 'pear', 'plum'] + sam = random_permutation(fruit, method="Fisher-Yates", prng=ff) + assert (sam == fruit).all() + + +def test_pikk_shuffle(): + """ + Test PIKK shuffling + """ + ff = fake_generator() + sam = pikk_shuffle(5, prng=ff) + assert (sam == [1, 2, 3, 4, 5]).all() + + ff = fake_generator() + sam = random_permutation(5, method="random_sort", prng=ff) + assert (sam+1 == [1, 2, 3, 4, 5]).all() # shift to 1-index + + +def test_permute_by_index(): + """ + Test permuting by index shuffling + """ + ff = fake_generator() + sam = permute_by_index(5, prng=ff) + assert (sam == [2, 3, 1, 4, 5]).all() + + ff = fake_generator() + sam = random_permutation(5, method="permute_by_index", prng=ff) + assert (sam+1 == [2, 3, 1, 4, 5]).all() # shift to 1-index From 04aa38054e142c76174f388926055c2134a7241f Mon Sep 17 00:00:00 2001 From: Kellie Ottoboni Date: Thu, 13 Sep 2018 11:49:17 -0700 Subject: [PATCH 4/4] DOC: add permutations example page and clean up sample example --- docs/examples/index.rst | 3 +- docs/examples/permutations.rst | 50 ++++++++++++++++++++++++++++++++++ docs/examples/sample.rst | 18 +++++++----- 3 files changed, 63 insertions(+), 8 deletions(-) create mode 100644 docs/examples/permutations.rst diff --git a/docs/examples/index.rst b/docs/examples/index.rst index 9fce68c..fe5acd0 100644 --- a/docs/examples/index.rst +++ b/docs/examples/index.rst @@ -7,4 +7,5 @@ Contents: :maxdepth: 2 prng.rst - sample.rst \ No newline at end of file + sample.rst + permutations.rst \ No newline at end of file diff --git a/docs/examples/permutations.rst b/docs/examples/permutations.rst new file mode 100644 index 0000000..cafda12 --- /dev/null +++ b/docs/examples/permutations.rst @@ -0,0 +1,50 @@ +Permuting a list +---------------- + +.. code:: + + >>> from cryptorandom.cryptorandom import SHA256 + >>> from cryptorandom.sample import random_permutation + >>> import numpy as np + + +The `sample` module contains methods for generating random permutations, compatible with any pseudorandom number generator that has `randint` and `random` methods. The module includes several algorithms to permute lists or arrays. + +The main workhorse is the `random_permutation` function. The default algorithm is `Fisher-Yates`, a shuffling method. + +.. code:: + + >>> fruit = ['apple', 'banana', 'cherry', 'pear', 'plum'] + >>> s = SHA256(1234567890) + >>> random_permutation(fruit, prng=s) + array(['plum', 'apple', 'banana', 'pear', 'cherry'], dtype='>> np.random.permutation(fruit) # Returns permuted list + array(['apple', 'banana', 'plum', 'cherry', 'pear'], dtype='>> np.random.shuffle(fruit) # Permutes the list in place, returns None + >>> fruit + ['cherry', 'plum', 'pear', 'banana', 'apple'] + + +The permutation algorithms available are: + +================ =============================================== +Method description +================ =============================================== +Fisher-Yates a shuffling algorithm +random_sort generate random floats and sort +permute_by_index sample integer indices without replacement +================ =============================================== + +.. code:: + + >>> %timeit random_permutation(fruit, method="Fisher-Yates", prng=s) + 10000 loops, best of 3: 53.3 µs per loop + >>> %timeit random_permutation(fruit, method="random_sort", prng=s) + 10000 loops, best of 3: 37.5 µs per loop + >>> %timeit random_permutation(fruit, method="permute_by_index", prng=s) + 10000 loops, best of 3: 22 µs per loop diff --git a/docs/examples/sample.rst b/docs/examples/sample.rst index 1761c8f..a6fa984 100644 --- a/docs/examples/sample.rst +++ b/docs/examples/sample.rst @@ -7,19 +7,26 @@ Random sampling >>> from cryptorandom.sample import random_sample >>> import numpy as np +We provide a sampling module compatible with any pseudorandom number generator that has `randint` and `random` methods. The module includes a variety of algorithms for weighted or unweighted sampling, with or without replacement. -Numpy and the base random module offer methods for drawing simple random samples with and without replacement. The default is to use sampling indices without replacement: +The main workhorse is the `random_sample` function. The default sampling algorithm is `sample_by_index`, sampling indices without replacement. .. code:: - >>> fruit = ['apple', 'banana', 'cherry', 'pear', 'plum'] >>> s = SHA256(1234567890) >>> random_sample(fruit, 2, prng=s) array(['plum', 'cherry'], dtype='>> np.random.choice(fruit, 2) + array(['plum', 'apple'], dtype='>> %timeit random_sample(fruit, 2, method="Fisher-Yates", prng=s)