-
Notifications
You must be signed in to change notification settings - Fork 20
/
Copy pathcompute_util.py
63 lines (42 loc) · 1.27 KB
/
compute_util.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
from sklearn.feature_extraction.text import TfidfVectorizer
from scipy.spatial.distance import cosine
import math
def compute_sim_matrix(txtVecs):
rows= len(txtVecs)
sim_matrix = [[0 for x in range(rows)] for y in range(rows)]
for i in range(rows):
for j in range(i+1,rows,1):
sim_matrix[i][j]=compute_sim_value(txtVecs[i], txtVecs[j])
sim_matrix[j][i]=sim_matrix[i][j]
return sim_matrix
def compute_sim_value(vecarr1, vecarr2):
sim_value = 1- cosine(vecarr1, vecarr2) #cosine=distance
return sim_value
def compute_mean_sd(numbers):
meanVal = 0
sdVal = 0
sumVal = 0
for num in numbers:
sumVal = sumVal + num
meanVal = sumVal/len(numbers)
varainceSumVal = 0
for num in numbers:
varainceSumVal = varainceSumVal + (num-meanVal)*(num-meanVal)
sdVal = math.sqrt(varainceSumVal/len(numbers))
return [meanVal, sdVal]
def MultiplyTwoSetsOneToOne(set1, set2):
if len(set1)!=len(set2):
print("len_set1="+len(set1)+",len_set2="+len(set2))
return set1
merged = []
for i in range(len(set1)):
s1 = set1[i]
s2 = set2[i]
merged.append(s1*s2)
return merged
def compute_row_sim_I(txtVec, txtVecs):
rowSimsToI=[]
for i in range(len(txtVecs)):
simVal=compute_sim_value(txtVecs[i], txtVec)
rowSimsToI.append(simVal)
return rowSimsToI