Skip to content

Commit

Permalink
Merge pull request #100 from pescadores/ejh_20170706_mux_stats
Browse files Browse the repository at this point in the history
Tests for Mux statistics
bmcfee authored Aug 21, 2017

Verified

This commit was created on GitHub.com and signed with GitHub’s verified signature. The key has expired.
2 parents f56d23f + abb4bbb commit df01c72
Showing 1 changed file with 57 additions and 22 deletions.
79 changes: 57 additions & 22 deletions tests/test_mux.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,8 @@
import pytest

import collections
import itertools
import numpy as np
import random
import scipy.stats

import pescador
import pescador.mux
@@ -162,7 +161,6 @@ def test_mux_of_muxes_itered():
random_state=135)
samples1 = mux1.iterate(max_iter=1000)
count1 = collections.Counter(samples1)
print(count1)
assert set('abcxyz') == set(count1.keys())

n123 = pescador.Streamer('123')
@@ -172,7 +170,6 @@ def test_mux_of_muxes_itered():
random_state=246)
samples2 = mux2.iterate(max_iter=1000)
count2 = collections.Counter(samples2)
print(count2)
assert set('123456') == set(count2.keys())

# Note that (random_state=987, k=2) fails.
@@ -181,7 +178,6 @@ def test_mux_of_muxes_itered():
random_state=987)
samples3 = mux3.iterate(max_iter=1000)
count3 = collections.Counter(samples3)
print(count3)
assert set('abcxyz123456') == set(count3.keys())


@@ -204,26 +200,37 @@ def test_mux_of_muxes_single():
prune_empty_streams=False)
samples3 = list(mux3.iterate(max_iter=10000))
count3 = collections.Counter(samples3)
print(samples3[:10], count3)
assert set('abcxyz123456') == set(count3.keys())


def test_critical_mux():
# Check on Issue #80
chars = 'abcde'
streamers = [pescador.Streamer(x * 5) for x in chars]
n_reps = 5
streamers = [pescador.Streamer(x * n_reps) for x in chars]
mux = pescador.Mux(streamers, k=len(chars), rate=None,
with_replacement=False, revive=True,
with_replacement=False, revive=False,
prune_empty_streams=False, random_state=135)
samples = mux.iterate(max_iter=1000)
print(collections.Counter(samples))
samples = list(mux.iterate(max_iter=1000))
assert len(collections.Counter(samples)) == len(chars)
assert len(samples) == len(chars) * n_reps


def _choice(vals, seed=11111):
rng = np.random.RandomState(seed=seed)
n = len(vals)
while True:
yield vals[rng.randint(0, n)]


def _cycle(values):
while True:
for v in values:
yield v


def test_critical_mux_of_rate_limited_muxes():
# Check on Issue #79
def _choice(vals):
while True:
yield random.choice(vals)

ab = pescador.Streamer(_choice, 'ab')
cd = pescador.Streamer(_choice, 'cd')
@@ -244,7 +251,6 @@ def _choice(vals):
count = collections.Counter(samples)
max_count, min_count = max(count.values()), min(count.values())
assert (max_count - min_count) / max_count < 0.2
print(count)
assert set('abcdefghijkl') == set(count.keys())


@@ -258,11 +264,6 @@ def test_restart_mux():

def test_sampled_mux_of_muxes():

def _cycle(values):
while True:
for v in values:
yield v

# Build some sample streams
ab = pescador.Streamer(_cycle, 'ab')
cd = pescador.Streamer(_cycle, 'cd')
@@ -273,7 +274,7 @@ def _cycle(values):
# And inspect the first mux
samples1 = list(mux1(max_iter=6 * 10))
count1 = collections.Counter(samples1)
print(count1)

assert set(count1.keys()) == set('abcdef')

# Build another set of streams
@@ -286,15 +287,13 @@ def _cycle(values):
# And inspect the second mux
samples2 = list(mux2(max_iter=6 * 10))
count2 = collections.Counter(samples2)
print(count2)
assert set(count2.keys()) == set('ghijkl')

# Merge the muxes together.
mux3 = pescador.Mux([mux1, mux2], k=2, rate=None,
with_replacement=False, revive=False)
samples3 = list(mux3.iterate(max_iter=10000))
count3 = collections.Counter(samples3)
print(count3)
assert set('abcdefghijkl') == set(count3.keys())
max_count, min_count = max(count3.values()), min(count3.values())
assert (max_count - min_count) / max_count < 0.2
@@ -310,3 +309,39 @@ def test_mux_inf_loop():
with_replacement=False, random_state=1234)

assert len(list(mux(max_iter=100))) == 0


def test_mux_stacked_uniform_convergence():
"""This test is designed to check that boostrapped streams of data
(Streamer subsampling, rate limiting) cascaded through multiple
multiplexors converges in expectation to a flat, uniform sample of the
stream directly.
"""
ab = pescador.Streamer(_choice, 'ab')
cd = pescador.Streamer(_choice, 'cd')
ef = pescador.Streamer(_choice, 'ef')
mux1 = pescador.Mux([ab, cd, ef], k=2, rate=2, with_replacement=False,
revive=True, random_state=1357)

gh = pescador.Streamer(_choice, 'gh')
ij = pescador.Streamer(_choice, 'ij')
kl = pescador.Streamer(_choice, 'kl')

mux2 = pescador.Mux([gh, ij, kl], k=2, rate=2, with_replacement=False,
revive=True, random_state=2468)

stacked_mux = pescador.Mux([mux1, mux2], k=2, rate=None,
with_replacement=False, revive=True,
random_state=12345)

max_iter = 1000
chars = 'abcdefghijkl'
samples = list(stacked_mux.iterate(max_iter=max_iter))
counter = collections.Counter(samples)
assert set(chars) == set(counter.keys())

counts = np.asarray(list(counter.values()))

# Check that the pvalue for the chi^2 test is at least 0.95
test = scipy.stats.chisquare(counts)
assert test.pvalue >= 0.95

0 comments on commit df01c72

Please sign in to comment.