forked from usnistgov/SP800-90B_EntropyAssessment
-
Notifications
You must be signed in to change notification settings - Fork 0
/
markov.py
executable file
·64 lines (56 loc) · 2.25 KB
/
markov.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
import math
# Non-iid Markov test - DRAFT NIST SP 800-90B (2016) Section 6.3.3
#
# NOTE: this software is made available with no guarantee - implied or otherwise -
# of correctness or completeness. See user guide for full disclaimer.
#
# T. A. Hall
# CSD/ITL/NIST
# 25 February 2013
#
# Updated by Kerry McKay
# March 3, 2016
def markov_test(s, k, alpha):
# 1. Re-define the confidence level to be alpha = min(alpha^k^2,alpha^d)
# where k^2 is the number of terms in the transition matrix and d=128 is
# the assumed length of the Markov chain.
d = 128
alpha_exp = max(k * k, d)
alpha = math.pow(alpha, alpha_exp)
# 2. Estimate the initial state probability distribution, P, with:
# Pi = min(1, o_i/L + epsilon)
count = [s.count(i) for i in range(k)]
L = len(s)
epsilon_term = math.log(1.0/(1.0 - alpha),2)
epsilon = math.sqrt(epsilon_term/(2 * L))
P = [min(1.0, count[i]/float(L) + epsilon) for i in range(k)]
# 3.Let o_s_L = o_s_L - 1
# Need to subtract 1 from count of last symbol for
# bounding matrix construction. Nothing follows the last occurance,
# therefore it should not be included in transition proportion.
count[s[-1]] -= 1
# 3. Estimate the probabilities in the transition matrix T, overestimating where...
# Ti,j = 1 if o_i = 0
# min(1, oi,j + eps_i) otherwise
oij = [[0 for j in range(k)] for i in range(k)]
oi = s[0]
for oj in s[1:]:
oij[oi][oj] += 1
oi = oj
epsilon_i = [0.0 if count[i] == 0 else math.sqrt(epsilon_term/(2.0 * count[i])) for i in range(k)]
T = [[0 if count[i] == 0 else min(1.0, oij[i][j]/float(count[i]) + epsilon_i[i]) for j in range(k)] for i in range(k)]
# 4. Using the transition matrix T, find the probability of the most likely
# sequence of states (pmax) ...
for j in range(d-1):
h = [0 for i in range(k)]
for c in range(k):
Pp = [0 for i in range(k)]
for i in range(k):
Pp[i] = P[i]*T[i][c]
h[c] = max(Pp)
P[:] = h[:]
pmax = max(P)
# 5. The min-entropy is the negative logarithm of the probability of the
# most likely sequence of states (pmax)
min_entropy = -math.log(pmax, 2.0) / d
return pmax, min_entropy