diff --git a/check_tokens.py b/check_tokens.py index 6e1da6e..2a1ca0d 100644 --- a/check_tokens.py +++ b/check_tokens.py @@ -39,7 +39,7 @@ tokens.append(x) #remove all punctuations -tokens = [re.sub("[\>\<\/\#\”\“\'\`\(\)\:\;\!\?\"\,\s\.\[,\]]+", "", w) for w in tokens] +tokens = [re.sub("[\>\<\/\#\”\“\'\`\(\)\:\;\!\?\"\,\s\.\[\]]+", "", w) for w in tokens] #open MALINDO Morph and make a list of all surface forms m = codecs.open('/home/david/MALINDO_Morph/malindo_dic_20180817.tsv', encoding='utf-8', mode='r')