-
Notifications
You must be signed in to change notification settings - Fork 88
/
Copy pathJaccardSimilarityCoefficient.java
78 lines (67 loc) · 2.34 KB
/
JaccardSimilarityCoefficient.java
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
package cn.codepub.algorithms.strings;
import com.google.common.base.Preconditions;
import com.google.common.collect.Sets;
import org.apache.commons.lang3.StringUtils;
import java.util.HashSet;
import java.util.Set;
/**
* <p>
* Created with IntelliJ IDEA. 2015/8/9 14:22
* </p>
* <p>
* ClassName:JaccardSimilarityCoefficient
* </p>
* <p>
* Description:杰卡德相似系数,是衡量两个集合的相似度的一种指标,J(A,B)=|A∩B|/|A∪B|
* 杰卡德距离,是衡量两个集合中不同元素占所有元素的比例来衡量两个集合的区分度,Ja(A,B)=1-J(A,B)
* </P>
*
* @author Wang Xu
* @version V1.0.0
* @since V1.0.0
*/
public class JaccardSimilarityCoefficient {
public double getJSC(String str1, String str2) {
if (StringUtils.isEmpty(str1) && StringUtils.isEmpty(str2)) {
return 1;
}
Set<Character> s1 = new HashSet<Character>();
Set<Character> s2 = new HashSet<Character>();
str1 = Preconditions.checkNotNull(str1);
str2 = Preconditions.checkNotNull(str2);
char[] chars1 = str1.toCharArray();
char[] chars2 = str2.toCharArray();
for (char c : chars1) {
s1.add(c);
}
for (char c : chars2) {
s2.add(c);
}
//求交集
Set<Character> intersection = new HashSet<Character>();
intersection.addAll(s1);
intersection.retainAll(s2);
//求并集
Set<Character> union = new HashSet<Character>();
union.addAll(s1);
union.addAll(s2);
return (double) intersection.size() / union.size();
}
public static double calcBySets(String s0, String s1) {
if (s0.isEmpty() && s1.isEmpty()) {
return 1.0;
}
Set<Character> words0 = new HashSet<Character>();
Set<Character> words1 = new HashSet<Character>();
for (int i = 0; i < s0.length(); i++) {
words0.add(s0.charAt(i));
}
for (int i = 0; i < s1.length(); i++) {
words1.add(s1.charAt(i));
}
double intersect = Sets.intersection(words0, words1).size();
double union = Sets.union(words0, words1).size();
System.out.println(Sets.union(words0, words1));
return intersect / union;
}
}