-
Notifications
You must be signed in to change notification settings - Fork 0
/
OriginalAssociationRules.py
105 lines (81 loc) · 3.04 KB
/
OriginalAssociationRules.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
import random
import datetime
N = 25 # no. of attributes
MINSUP = 0.15
# Creates a file named filename containing m sorted itemsets of items 0..N-1
def createfile(m, filename):
f = open(filename, "w")
for line in range(m):
itemset = []
for i in range(random.randrange(N) + 1):
item = random.randrange(N) # random integer 0..N-1
if item not in itemset:
itemset += [item]
itemset.sort()
for i in range(len(itemset)):
f.write(str(itemset[i]) + " ")
f.write("\n")
f.close()
# Returns true iff all of smallitemset items are in bigitemset (the itemsets are sorted lists)
def is_in(smallitemset, bigitemset):
s = b = 0 # s = index of smallitemset, b = index of bigitemset
while s < len(smallitemset) and b < len(bigitemset):
if smallitemset[s] > bigitemset[b]:
b += 1
elif smallitemset[s] < bigitemset[b]:
return False
else:
s += 1
b += 1
return s == len(smallitemset)
# Returns a list of itemsets (from the list itemsets) that are frequent
# in the itemsets in filename
def frequent_itemsets(filename, itemsets):
f = open(filename, "r")
filelength = 0 # filelength is the no. of itemsets in the file. we
# use it to calculate the support of an itemset
count = [0] * len(itemsets) # creates a list of counters
line = f.readline()
while line != "":
filelength += 1
line = line.split() # splits line to separate strings
for i in range(len(line)):
line[i] = int(line[i]) # converts line to integers
for i in range(len(itemsets)):
if is_in(itemsets[i], line):
count[i] += 1
line = f.readline()
f.close()
freqitemsets = []
for i in range(len(itemsets)):
if count[i] >= MINSUP * filelength:
freqitemsets += [itemsets[i]]
return freqitemsets
def create_kplus1_itemsets(kitemsets, filename):
kplus1_itemsets = []
for i in range(len(kitemsets) - 1):
j = i + 1 # j is an index
# compares all pairs, without the last item, (note that the lists are sorted)
# and if they are equal than adds the last item of kitemsets[j] to kitemsets[i]
# in order to create k+1 itemset
while j < len(kitemsets) and kitemsets[i][:-1] == kitemsets[j][:-1]:
kplus1_itemsets += [kitemsets[i] + [kitemsets[j][-1]]]
j += 1
# checks which of the k+1 itemsets are frequent
return frequent_itemsets(filename, kplus1_itemsets)
def create_1itemsets(filename):
it = []
for i in range(N):
it += [[i]]
return frequent_itemsets(filename, it)
def minsup_itemsets(filename):
k=1
minsupsets = kitemsets = create_1itemsets(filename)
while kitemsets != []:
kitemsets = create_kplus1_itemsets(kitemsets, filename)
minsupsets += kitemsets
return minsupsets
#t1 = datetime.datetime.now()
#print(minsup_itemsets("itemsets.txt"))
#t2 = datetime.datetime.now()
#print("False",t2-t1)