Skip to content

Commit

Permalink
refactored basicface regex
Browse files Browse the repository at this point in the history
(tested via string.equals on the old one)
  • Loading branch information
brendano committed Oct 22, 2012
1 parent 595339e commit d0988f8
Showing 1 changed file with 7 additions and 2 deletions.
9 changes: 7 additions & 2 deletions src/cmu/arktweetnlp/Twokenize.java
Original file line number Diff line number Diff line change
Expand Up @@ -98,9 +98,14 @@ public static String OR(String... parts) {
// @aliciakeys Put it in a love song :-))
// @hellocalyclops =))=))=)) Oh well

static String basicface = "(?:(?i)(♥|0|o|°|v|\\$|t|x|\\.|;|\\u0CA0|@|ʘ|•|・|◕|\\^|¬|\\*)(?:[\\.]|[_-]+)\\2)|(?:--['\"])"+
"|(?:<|&lt;|>|&gt;)[\\._-]+(?:<|&lt;|>|&gt;)";
static String s1 = "(♥|0|o|°|v|\\$|t|x|\\.|;|\\u0CA0|@|ʘ|•|・|◕|\\^|¬|\\*)";
static String s2 = "(?:[\\.]|[_-]+)\\2";
static String s3 = "(?:--['\"])";
static String s4 = "(?:<|&lt;|>|&gt;)[\\._-]+(?:<|&lt;|>|&gt;)";
static String basicface = "(?:(?i)" +s1+s2+ ")|" +s3+ "|" + s4;

static String eastEmote = "[\\\\\ƪԄ\\((<>;ヽ\\-=~\\*]+(?:"+basicface+"|[^A-Za-z0-9\\s\\(\\)\\*:=-])+[\\-=\\);'\\u0022<>ʃ)//ノノ丿╯σっµ~\\*]+";

public static String emoticon = OR(
// Standard version :) :( :] :D :P
"(?:>|&gt;)?" + OR(normalEyes, wink) + OR(noseArea,"[Oo]") +
Expand Down

0 comments on commit d0988f8

Please sign in to comment.