Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Tests for Mux statistics #100

Merged
merged 5 commits into from
Aug 21, 2017
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
79 changes: 57 additions & 22 deletions tests/test_mux.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,8 @@
import pytest

import collections
import itertools
import numpy as np
import random
import scipy.stats

import pescador
import pescador.mux
Expand Down Expand Up @@ -162,7 +161,6 @@ def test_mux_of_muxes_itered():
random_state=135)
samples1 = mux1.iterate(max_iter=1000)
count1 = collections.Counter(samples1)
print(count1)
assert set('abcxyz') == set(count1.keys())

n123 = pescador.Streamer('123')
Expand All @@ -172,7 +170,6 @@ def test_mux_of_muxes_itered():
random_state=246)
samples2 = mux2.iterate(max_iter=1000)
count2 = collections.Counter(samples2)
print(count2)
assert set('123456') == set(count2.keys())

# Note that (random_state=987, k=2) fails.
Expand All @@ -181,7 +178,6 @@ def test_mux_of_muxes_itered():
random_state=987)
samples3 = mux3.iterate(max_iter=1000)
count3 = collections.Counter(samples3)
print(count3)
assert set('abcxyz123456') == set(count3.keys())


Expand All @@ -204,26 +200,37 @@ def test_mux_of_muxes_single():
prune_empty_streams=False)
samples3 = list(mux3.iterate(max_iter=10000))
count3 = collections.Counter(samples3)
print(samples3[:10], count3)
assert set('abcxyz123456') == set(count3.keys())


def test_critical_mux():
# Check on Issue #80
chars = 'abcde'
streamers = [pescador.Streamer(x * 5) for x in chars]
n_reps = 5
streamers = [pescador.Streamer(x * n_reps) for x in chars]
mux = pescador.Mux(streamers, k=len(chars), rate=None,
with_replacement=False, revive=True,
with_replacement=False, revive=False,
prune_empty_streams=False, random_state=135)
samples = mux.iterate(max_iter=1000)
print(collections.Counter(samples))
samples = list(mux.iterate(max_iter=1000))
assert len(collections.Counter(samples)) == len(chars)
assert len(samples) == len(chars) * n_reps


def _choice(vals, seed=11111):
rng = np.random.RandomState(seed=seed)
n = len(vals)
while True:
yield vals[rng.randint(0, n)]


def _cycle(values):
while True:
for v in values:
yield v


def test_critical_mux_of_rate_limited_muxes():
# Check on Issue #79
def _choice(vals):
while True:
yield random.choice(vals)

ab = pescador.Streamer(_choice, 'ab')
cd = pescador.Streamer(_choice, 'cd')
Expand All @@ -244,7 +251,6 @@ def _choice(vals):
count = collections.Counter(samples)
max_count, min_count = max(count.values()), min(count.values())
assert (max_count - min_count) / max_count < 0.2
print(count)
assert set('abcdefghijkl') == set(count.keys())


Expand All @@ -258,11 +264,6 @@ def test_restart_mux():

def test_sampled_mux_of_muxes():

def _cycle(values):
while True:
for v in values:
yield v

# Build some sample streams
ab = pescador.Streamer(_cycle, 'ab')
cd = pescador.Streamer(_cycle, 'cd')
Expand All @@ -273,7 +274,7 @@ def _cycle(values):
# And inspect the first mux
samples1 = list(mux1(max_iter=6 * 10))
count1 = collections.Counter(samples1)
print(count1)

assert set(count1.keys()) == set('abcdef')

# Build another set of streams
Expand All @@ -286,15 +287,13 @@ def _cycle(values):
# And inspect the second mux
samples2 = list(mux2(max_iter=6 * 10))
count2 = collections.Counter(samples2)
print(count2)
assert set(count2.keys()) == set('ghijkl')

# Merge the muxes together.
mux3 = pescador.Mux([mux1, mux2], k=2, rate=None,
with_replacement=False, revive=False)
samples3 = list(mux3.iterate(max_iter=10000))
count3 = collections.Counter(samples3)
print(count3)
assert set('abcdefghijkl') == set(count3.keys())
max_count, min_count = max(count3.values()), min(count3.values())
assert (max_count - min_count) / max_count < 0.2
Expand All @@ -310,3 +309,39 @@ def test_mux_inf_loop():
with_replacement=False, random_state=1234)

assert len(list(mux(max_iter=100))) == 0


def test_mux_stacked_uniform_convergence():
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

A 1-sentence summary docstring here might be nice. I like these for more complex tests (like this one).

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

done

"""This test is designed to check that boostrapped streams of data
(Streamer subsampling, rate limiting) cascaded through multiple
multiplexors converges in expectation to a flat, uniform sample of the
stream directly.
"""
ab = pescador.Streamer(_choice, 'ab')
cd = pescador.Streamer(_choice, 'cd')
ef = pescador.Streamer(_choice, 'ef')
mux1 = pescador.Mux([ab, cd, ef], k=2, rate=2, with_replacement=False,
revive=True, random_state=1357)

gh = pescador.Streamer(_choice, 'gh')
ij = pescador.Streamer(_choice, 'ij')
kl = pescador.Streamer(_choice, 'kl')

mux2 = pescador.Mux([gh, ij, kl], k=2, rate=2, with_replacement=False,
revive=True, random_state=2468)

stacked_mux = pescador.Mux([mux1, mux2], k=2, rate=None,
with_replacement=False, revive=True,
random_state=12345)

max_iter = 1000
chars = 'abcdefghijkl'
samples = list(stacked_mux.iterate(max_iter=max_iter))
counter = collections.Counter(samples)
assert set(chars) == set(counter.keys())

counts = np.asarray(list(counter.values()))

# Check that the pvalue for the chi^2 test is at least 0.95
test = scipy.stats.chisquare(counts)
assert test.pvalue >= 0.95