diff --git a/tests/test_mux.py b/tests/test_mux.py index 7f2ae0e..bd14d59 100644 --- a/tests/test_mux.py +++ b/tests/test_mux.py @@ -1,9 +1,8 @@ import pytest import collections -import itertools import numpy as np -import random +import scipy.stats import pescador import pescador.mux @@ -162,7 +161,6 @@ def test_mux_of_muxes_itered(): random_state=135) samples1 = mux1.iterate(max_iter=1000) count1 = collections.Counter(samples1) - print(count1) assert set('abcxyz') == set(count1.keys()) n123 = pescador.Streamer('123') @@ -172,7 +170,6 @@ def test_mux_of_muxes_itered(): random_state=246) samples2 = mux2.iterate(max_iter=1000) count2 = collections.Counter(samples2) - print(count2) assert set('123456') == set(count2.keys()) # Note that (random_state=987, k=2) fails. @@ -181,7 +178,6 @@ def test_mux_of_muxes_itered(): random_state=987) samples3 = mux3.iterate(max_iter=1000) count3 = collections.Counter(samples3) - print(count3) assert set('abcxyz123456') == set(count3.keys()) @@ -204,26 +200,37 @@ def test_mux_of_muxes_single(): prune_empty_streams=False) samples3 = list(mux3.iterate(max_iter=10000)) count3 = collections.Counter(samples3) - print(samples3[:10], count3) assert set('abcxyz123456') == set(count3.keys()) def test_critical_mux(): # Check on Issue #80 chars = 'abcde' - streamers = [pescador.Streamer(x * 5) for x in chars] + n_reps = 5 + streamers = [pescador.Streamer(x * n_reps) for x in chars] mux = pescador.Mux(streamers, k=len(chars), rate=None, - with_replacement=False, revive=True, + with_replacement=False, revive=False, prune_empty_streams=False, random_state=135) - samples = mux.iterate(max_iter=1000) - print(collections.Counter(samples)) + samples = list(mux.iterate(max_iter=1000)) + assert len(collections.Counter(samples)) == len(chars) + assert len(samples) == len(chars) * n_reps + + +def _choice(vals, seed=11111): + rng = np.random.RandomState(seed=seed) + n = len(vals) + while True: + yield vals[rng.randint(0, n)] + + +def _cycle(values): + while True: + for v in values: + yield v def test_critical_mux_of_rate_limited_muxes(): # Check on Issue #79 - def _choice(vals): - while True: - yield random.choice(vals) ab = pescador.Streamer(_choice, 'ab') cd = pescador.Streamer(_choice, 'cd') @@ -244,7 +251,6 @@ def _choice(vals): count = collections.Counter(samples) max_count, min_count = max(count.values()), min(count.values()) assert (max_count - min_count) / max_count < 0.2 - print(count) assert set('abcdefghijkl') == set(count.keys()) @@ -258,11 +264,6 @@ def test_restart_mux(): def test_sampled_mux_of_muxes(): - def _cycle(values): - while True: - for v in values: - yield v - # Build some sample streams ab = pescador.Streamer(_cycle, 'ab') cd = pescador.Streamer(_cycle, 'cd') @@ -273,7 +274,7 @@ def _cycle(values): # And inspect the first mux samples1 = list(mux1(max_iter=6 * 10)) count1 = collections.Counter(samples1) - print(count1) + assert set(count1.keys()) == set('abcdef') # Build another set of streams @@ -286,7 +287,6 @@ def _cycle(values): # And inspect the second mux samples2 = list(mux2(max_iter=6 * 10)) count2 = collections.Counter(samples2) - print(count2) assert set(count2.keys()) == set('ghijkl') # Merge the muxes together. @@ -294,7 +294,6 @@ def _cycle(values): with_replacement=False, revive=False) samples3 = list(mux3.iterate(max_iter=10000)) count3 = collections.Counter(samples3) - print(count3) assert set('abcdefghijkl') == set(count3.keys()) max_count, min_count = max(count3.values()), min(count3.values()) assert (max_count - min_count) / max_count < 0.2 @@ -310,3 +309,39 @@ def test_mux_inf_loop(): with_replacement=False, random_state=1234) assert len(list(mux(max_iter=100))) == 0 + + +def test_mux_stacked_uniform_convergence(): + """This test is designed to check that boostrapped streams of data + (Streamer subsampling, rate limiting) cascaded through multiple + multiplexors converges in expectation to a flat, uniform sample of the + stream directly. + """ + ab = pescador.Streamer(_choice, 'ab') + cd = pescador.Streamer(_choice, 'cd') + ef = pescador.Streamer(_choice, 'ef') + mux1 = pescador.Mux([ab, cd, ef], k=2, rate=2, with_replacement=False, + revive=True, random_state=1357) + + gh = pescador.Streamer(_choice, 'gh') + ij = pescador.Streamer(_choice, 'ij') + kl = pescador.Streamer(_choice, 'kl') + + mux2 = pescador.Mux([gh, ij, kl], k=2, rate=2, with_replacement=False, + revive=True, random_state=2468) + + stacked_mux = pescador.Mux([mux1, mux2], k=2, rate=None, + with_replacement=False, revive=True, + random_state=12345) + + max_iter = 1000 + chars = 'abcdefghijkl' + samples = list(stacked_mux.iterate(max_iter=max_iter)) + counter = collections.Counter(samples) + assert set(chars) == set(counter.keys()) + + counts = np.asarray(list(counter.values())) + + # Check that the pvalue for the chi^2 test is at least 0.95 + test = scipy.stats.chisquare(counts) + assert test.pvalue >= 0.95