-
Notifications
You must be signed in to change notification settings - Fork 197
/
Copy pathAlphabet.java
138 lines (118 loc) · 5 KB
/
Alphabet.java
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
/*************************************************************************
* Compilation: javac Alphabet.java
* Execution: java Alphabet
*
* A data type for alphabets, for use with string-processing code
* that must convert between an alphabet of size R and the integers
* 0 through R-1.
*
* Warning: supports only the basic multilingual plane (BMP), i.e,
* Unicode characters between U+0000 and U+FFFF.
*
*************************************************************************/
public class Alphabet {
public static final Alphabet BINARY = new Alphabet("01");
public static final Alphabet OCTAL = new Alphabet("01234567");
public static final Alphabet DECIMAL = new Alphabet("0123456789");
public static final Alphabet HEXADECIMAL = new Alphabet("0123456789ABCDEF");
public static final Alphabet DNA = new Alphabet("ACTG");
public static final Alphabet LOWERCASE = new Alphabet("abcdefghijklmnopqrstuvwxyz");
public static final Alphabet UPPERCASE = new Alphabet("ABCDEFGHIJKLMNOPQRSTUVWXYZ");
public static final Alphabet PROTEIN = new Alphabet("ACDEFGHIKLMNPQRSTVWY");
public static final Alphabet BASE64 = new Alphabet("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/");
public static final Alphabet ASCII = new Alphabet(128);
public static final Alphabet EXTENDED_ASCII = new Alphabet(256);
public static final Alphabet UNICODE16 = new Alphabet(65536);
private char[] alphabet; // the characters in the alphabet
private int[] inverse; // indices
private int R; // the radix of the alphabet
// Create a new Alphabet from sequence of characters in string.
public Alphabet(String alpha) {
// check that alphabet contains no duplicate chars
boolean[] unicode = new boolean[Character.MAX_VALUE];
for (int i = 0; i < alpha.length(); i++) {
char c = alpha.charAt(i);
if (unicode[c])
throw new IllegalArgumentException("Illegal alphabet: repeated character = '" + c + "'");
unicode[c] = true;
}
alphabet = alpha.toCharArray();
R = alpha.length();
inverse = new int[Character.MAX_VALUE];
for (int i = 0; i < inverse.length; i++)
inverse[i] = -1;
// can't use char since R can be as big as 65,536
for (int c = 0; c < R; c++)
inverse[alphabet[c]] = c;
}
// Create a new Alphabet of Unicode chars 0 to R-1
private Alphabet(int R) {
alphabet = new char[R];
inverse = new int[R];
this.R = R;
// can't use char since R can be as big as 65,536
for (int i = 0; i < R; i++)
alphabet[i] = (char) i;
for (int i = 0; i < R; i++)
inverse[i] = i;
}
// Create a new Alphabet of Unicode chars 0 to 255 (extended ASCII)
public Alphabet() {
this(256);
}
// is character c in the alphabet?
public boolean contains(char c) {
return inverse[c] != -1;
}
// return radix R
public int R() {
return R;
}
// return number of bits to represent an index
public int lgR() {
int lgR = 0;
for (int t = R-1; t >= 1; t /= 2)
lgR++;
return lgR;
}
// convert c to index between 0 and R-1.
public int toIndex(char c) {
if (c < 0 || c >= inverse.length || inverse[c] == -1) {
throw new IllegalArgumentException("Character " + c + " not in alphabet");
}
return inverse[c];
}
// convert String s over this alphabet into a base-R integer
public int[] toIndices(String s) {
char[] source = s.toCharArray();
int[] target = new int[s.length()];
for (int i = 0; i < source.length; i++)
target[i] = toIndex(source[i]);
return target;
}
// convert an index between 0 and R-1 into a char over this alphabet
public char toChar(int index) {
if (index < 0 || index >= R) {
throw new IndexOutOfBoundsException("Alphabet index out of bounds");
}
return alphabet[index];
}
// Convert base-R integer into a String over this alphabet
public String toChars(int[] indices) {
StringBuilder s = new StringBuilder(indices.length);
for (int i = 0; i < indices.length; i++)
s.append(toChar(indices[i]));
return s.toString();
}
public static void main(String[] args) {
int[] encoded1 = Alphabet.BASE64.toIndices("NowIsTheTimeForAllGoodMen");
String decoded1 = Alphabet.BASE64.toChars(encoded1);
StdOut.println(decoded1);
int[] encoded2 = Alphabet.DNA.toIndices("AACGAACGGTTTACCCCG");
String decoded2 = Alphabet.DNA.toChars(encoded2);
StdOut.println(decoded2);
int[] encoded3 = Alphabet.DECIMAL.toIndices("01234567890123456789");
String decoded3 = Alphabet.DECIMAL.toChars(encoded3);
StdOut.println(decoded3);
}
}