-import argparse
-import asc
-import cgi
-import cgitb
-from core import *
-import os
-import sys
-form = cgi.FieldStorage(encoding='utf-8')
-word = None
-w = False
-html = False
-debug = 0
-FILE_PATH = os.path.abspath(os.path.dirname(sys.argv[0]))
-if 'word' in form:
- sys.stdout = Reencoder(sys.stdout)
- print('Content-Type: text/html')
- print('')
- word = form['word'].value
- startd = {}
- endd = {}
- start = []
- end = []
- if 'debug' in form:
- debug = int(form['debug'].value)
- html = True
- class DefaultAppend(argparse.Action):
- def __call__(self, parser, namespace, values, option_string=None):
- items = argparse._copy.copy(
- argparse._ensure_value(namespace, self.dest, []))
- if values == None:
- values = ''
- items.append(values)
- setattr(namespace, self.dest, items)
- parser = argparse.ArgumentParser()
- parser.add_argument('--word', '-w')
- parser.add_argument('--start', '-s', action=DefaultAppend, nargs='?')
- parser.add_argument('--end', '-e', action=DefaultAppend, nargs='?')
- parser.add_argument('--debug', '-d', type=int, default=0)
- parser.add_argument('--html', '-t', action='store_true')
- args = parser.parse_args()
- word = args.word
- if word is None:
- w = True
- debug = args.debug
- if args.html:
- sys.stdout = Reencoder(sys.stdout)
- html = True
- start = args.start
- end = args.end
-if html:
- cgitb.enable()
- cgitb.enable(format='plain')
-for f in form:
- if f[0] == 's':
- startd[f] = form[f].value
- if startd[f] == ' ':
- startd[f] = ''
- elif f[0] == 'e':
- endd[f] = form[f].value
- if endd[f] == ' ':
- endd[f] = ''
-start = start or [v for k, v in sorted(startd.items())]
-end = end or [v for k, v in sorted(endd.items())]
-pairs = list(zip(start, end))
-while True:
- if w:
- try:
- word = input()
- except (KeyboardInterrupt, EOFError):
- break
- word, db = asc.asc(word, pairs, debug, FILE_PATH)
- if html:
- print('
- print(word)
- print(db, end='') # lint:ok
- if html:
- print('
- if not w:
- break
\ No newline at end of file
-from conlang.conlangApp import *
-from core import *
-ar = soundChanger.applyRules
-lf = lambda f, pre='.': loadFile(pre + '/files/' + f)
-def last(name):
- return ('.' + name).rsplit('.', 1)[0][1:]
-def step(start, end):
- '''\
-provides the next step between start and end. does not check if they are
-linearly connected.'''
- if start != '':
- start += '.'
- return start + end[len(start):].split('.')[0]
-def asc(word, pairs, debug=0, pre='.'):
- '''\
-pairs format: [['a', 'a.b'], ['b', 'b.c']]
-debug = 0: don't show anything
-debug = 1: word at end of each pair
-debug = 2: word at end of each file
-debug = 3: word at end of each rule'''
- db = ''
- for p in pairs:
- if p[1].startswith(p[0]):
- cur, end = p
- while cur != end:
- cur = step(cur, end)
- word, steps = ar(word, lf(cur, pre))
- if debug > 2:
- db += steps
- if debug > 1:
- db += cur + ': ' + word + '\n'
- else:
- raise Exception(p[1] + ' does not start with ' + p[0])
- if debug == 1:
- db += p[1] + ': ' + word + '\n'
- return word, db
-saj = 'prt.west.sajura'
\ No newline at end of file
+import argparse
+import cgi
+import cgitb
+import os
+import sys
+from conlang import soundChangeApp, workers
+word = None
+stdin = False
+html = False
+debug = 0
+FILE_PATH = os.path.abspath(os.path.dirname(sys.argv[0]))
+if 'REQUEST_METHOD' in os.environ:
+ # It's being run as a cgi script
+ # Encode all non-ascii characters with xml escapes
+ sys.stdout = workers.Reencoder(sys.stdout)
+ print('Content-Type: text/html')
+ print('')
+ form = cgi.FieldStorage(encoding='utf-8')
+ word = form['word'].value
+ startd = {}
+ endd = {}
+ for f in form:
+ try:
+ # just get the number from the form key
+ n = int(f.split('-')[1])
+ v = form[f].value
+ if f[0] == 's':
+ startd[n] = v if v != ' ' else ''
+ elif f[0] == 'e':
+ endd[n] = v if v != ' ' else ''
+ except IndexError:
+ # there wasn't a '-' in f, so it wasn't a start or end key
+ continue
+ start = [v for k, v in sorted(startd.items())]
+ end = [v for k, v in sorted(endd.items())]
+ if 'debug' in form:
+ debug = int(form['debug'].value)
+ html = True
+ # It's being run from the command line
+ parser = argparse.ArgumentParser()
+ parser.add_argument('--word', '-w')
+ parser.add_argument('--start', '-s', action='append', default=[], nargs='?')
+ parser.add_argument('--end', '-e', action='append', default=[], nargs='?')
+ parser.add_argument('--debug', '-d', type=int, default=0)
+ parser.add_argument('--html', '-t', action='store_true')
+ args = parser.parse_args()
+ word = args.word
+ if word is None:
+ # take input from stdin
+ stdin = True
+ debug = args.debug
+ if args.html:
+ # encode all non-ascii characters with xml escapes
+ sys.stdout = Reencoder(sys.stdout)
+ html = True
+ start = [x if x else '' for x in args.start]
+ end = [x if x else '' for x in args.end]
+if html:
+ # html formatted error messages
+ cgitb.enable()
+ # plain error messages
+ cgitb.enable(format='plain')
+pairs = list(zip(start, end))
+while True:
+ if stdin:
+ try:
+ word = input()
+ except (KeyboardInterrupt, EOFError):
+ break
+ word, db = soundChangeApp.applyRuleFiles(word, pairs, debug)
+ if html:
+ print('')
+ print(word)
+ print(db, end='')
+ if html:
+ print('
+ if not stdin:
+ break
-#! /usr/bin/env python3.3 # lint:ok
-import regex
-from . import workers
-# rules are stored as dicts:
-# {'from': '', 'to': '', 'before': '', 'after': '',
-# 'unbefore': '', 'unafter': ''}
-# all cats would be included in a dict with keys being the cat name,
-# and the values being arrays of the members of that cat
-# e.g. {'nasal':['m', 'n', 'ŋ'], 'vlplos':['p', 't', 'k'], ...}
-def parseSoundChange(l, cats):
- out = {}
- if len(l.split(' = ')) == 2:
- out['catName'] = l.split(' = ')[0]
- category = l.split(' = ')[1]
- for c in cats:
- category = category.replace('{' + c + '}', ' '.join(cats[c]))
- out['category'] = category.split(' ')
- else:
- out['from'] = l.split(' > ')[0]
- if out['from'] == '0':
- out['from'] = ''
- out['from'] = out['from'].replace('#', '\\b')
- out['to'] = l.split(' > ')[1].split(' / ')[0].split(' ! ')[0]
- out['to'].replace('#', '\\b')
- if out['to'] == '0':
- out['to'] = ''
- try:
- out['before'] = l.split(' / ')[1].split('_')[0]
- out['before'] = out['before'].replace('#', '\\b')
- out['after'] = l.split(' / ')[1].split('_')[1].split(' ! ')[0]
- out['after'] = out['after'].replace('#', '\\b')
- except IndexError:
- pass
- try:
- out['unbefore'] = l.split(' ! ')[1].split('_')[0]
- out['unbefore'] = out['unbefore'].replace('#', '\\b')
- out['unafter'] = l.split(' ! ')[1].split('_')[1]
- out['unafter'] = out['unafter'].replace('#', '\\b')
- except IndexError:
- pass
- return out
-def applyRule(word, rule, cats):
- """Applies a specified rule to the given word,\
- making category replacements if necessary"""
- # create match, ignoring numbered categories
- match = '('
- if 'before' in rule and rule['before'] != '':
- match += '(?<=(?P' + rule['before'] + '))'
- if 'unbefore' in rule and rule['unbefore'] != '':
- match += '(?' + rule['from'] + ')'
- if 'after' in rule and rule['after'] != '':
- match += '(?=(?P' + rule['after'] + '))'
- if 'unafter' in rule and rule['unafter'] != '':
- match += '(?!' + rule['unafter'] + ')'
- match += ')'
- ruleStr = match
- # replace categories
- nc = [0]
- def catReplace(m):
- if m.group(2) in cats:
- out = '('
- if m.group(1) == '':
- out += '|'.join(cats[m.group(2)])
- else:
- out += '?P' + '|'.join(cats[m.group(2)])
- nc[0] += 1
- return out + ')'
- else:
- return m.group(0)
- match = regex.sub('\\{(\\d*):?([^}]*)}', catReplace, match)
- def fromToTo(m):
- # now u test the match to see if it matches with numbered categories
- # matched = m.group(1)
- catIndex = []
- if nc[0]:
- iNc = 0
- for c in regex.finditer('\\{(\\d+):([^}]*)}', ruleStr):
- # now what? maybe i should have it search the matched
- # string for this category, find which it is, and determine
- # which index for which numbered category
- cNum = int(c.group(1))
- cCat = c.group(2)
- cNc = 'nc' + str(iNc)
- cMatch = m.group(cNc)
- if cCat in cats:
- if len(catIndex) < cNum and catIndex[cNum] is not None:
- if cats[cCat][catIndex[cNum]] != cMatch:
- return m.group(0)
- # if anything has the wrong number,
- # quit out of this match
- else:
- workers.addPad(catIndex, cNum,
- cats[cCat].index(cMatch))
- iNc += 1
- # now, replace from (m.group('from')) with to
- toStr = rule['to']
- def toCatReplace(c):
- cNum = int(c.group(1))
- cCat = c.group(2)
- if cCat in cats:
- if len(catIndex) > cNum and catIndex[cNum] is not None:
- return cats[cCat][catIndex[cNum]]
- return regex.sub('\\{(\\d+):([^}]*)}', toCatReplace, toStr)
- word = regex.sub(match, fromToTo, word)
- return word
-def applyRules(word, soundChanges, showSteps=False):
- cats = {}
- for l in soundChanges:
- rc = parseSoundChange(l, cats)
- if 'catName' in rc:
- cats[rc['catName']] = rc['category']
- else:
- word = applyRule(word, rc, cats)
- if showSteps:
- print(rc, word)
- return word
\ No newline at end of file
-#! /usr/bin/env python3.3 # lint:ok
-import json
-from . import soundChanger, dictionary, inflections
-def customEncode(obj):
- '''Custom JSON encoder for Dictionary and Entry classes'''
- if isinstance(obj, dictionary.Dictionary):
- key = '__{!s}__'.format(obj.__class__.__name__)
- return {key: list(obj)}
- elif isinstance(obj, dictionary.Entry):
- key = '__{!s}__'.format(obj.__class__.__name__)
- return {key: obj.items()}
- else:
- raise TypeError("obj {} of type {}".format(obj, type(obj)))
-def classHook(dct):
- """JSON object hook to decode classes"""
- if len(dct) == 1:
- className, value = next(iter(dct.items()))
- className = className.strip('_')
- if className == 'Dictionary':
- return dictionary.Dictionary(value)
- elif className == 'Entry':
- return dictionary.Entry(value)
- return dct
-def loadFile(fileName):
- '''Returns specified file as a Dictionary or Entry if it is a json file,
- or a list of lines otherwise'''
- with open(fileName, encoding='utf-8') as f:
- if fileName.split('.')[-1] in ['json', 'js']:
- return json.load(f, object_hook=classHook)
- else:
- return [l.strip('\n') for l in f if len(l.strip('\n')) and l[:1] != '//']
-def saveFile(d, fileName, override=False):
- '''Saves the specified Dictionary to the specifed filename'''
- try:
- f = open(fileName, 'x')
- except FileExistsError:
- if not override:
- print('File exists, overwrite? [Y/n]', end=' ') # lint:ok
- if 'n' in input():
- return
- f = open(fileName, 'w')
- finally:
- json.dump(d, f, default=customEncode)
- f.close()
-def addEntry(d, fields=('word', 'pron', 'pos', 'cl', 'subcl', 'de')):
- '''Prompts user to enter specified fields, or, if none are specified,
- word, pron, pos, cl, subcl, de'''
- e = dictionary.Entry()
- for f in fields:
- print(f, end=': ')
- e[f] = input()
- if e[f] == '':
- del e[f]
- d.append(dictionary.Entry(e))
-def applyRules(d, rc, field='pron', field2=None):
- '''\
-Returns a copy of d with field2 replaced by field with the list of rules
-applied. field defaults to pron, and field2 defaults to whatever field is'''
- out = dictionary.Dictionary()
- for e in d:
- o = dictionary.Entry(e)
- if field2 is None:
- field2 = field
- o[field2] = soundChanger.applyRules(o[field], rc)[0]
- out.append(o)
- return out
-def sortFields(d, fields):
- for i in range(len(d)):
- e = dictionary.Entry()
- for f in fields:
- e[f] = d[i][f]
- d[i] = e
-#! /usr/bin/env python3.3 # lint:ok
import regex
import collections
-from . import soundChanger
+import os
+from . import entryFormat, soundChanger, soundChangeApp
+def customEncode(obj):
+ """Custom JSON encoder for Dictionary and Entry classes"""
+ if isinstance(obj, Dictionary):
+ key = '__{!s}__'.format(obj.__class__.__name__)
+ return {key: [list(obj), obj.alpha, obj.pat, obj.patArgs]}
+ elif isinstance(obj, Entry):
+ key = '__{!s}__'.format(obj.__class__.__name__)
+ return {key: [obj.items(), obj.pat, obj.patArgs]}
+ else:
+ raise TypeError("obj {} of type {}".format(obj, type(obj)))
+def classHook(dct):
+ """JSON object hook to decode classes"""
+ if len(dct) == 1:
+ className, value = next(iter(dct.items()))
+ className = className.strip('_')
+ if className == 'Dictionary':
+ return Dictionary(*value)
+ elif className == 'Entry':
+ return Entry(*value)
+ return dct
class Dictionary(collections.UserList):
"""A dictionary containing entries in a conlang"""
+ def __init__(self, l=[], alpha=None, pat=None, patArgs={}):
+ self.alpha = alpha
+ self.pat = pat
+ self.patArgs = patArgs
+ super().__init__()
+ for e in l:
+ self.append(Entry(e, pat, patArgs))
+ @classmethod
+ def fromJSON(cls, filename):
+ """Loads a Dictionary from a JSON file."""
+ with open(os.path.expanduser(filename), encoding='utf-8') as f:
+ return json.load(f, object_hook=classHook)
def __add__(self, d):
- return Dictionary(super().__add__(d))
+ return Dictionary(super().__add__(d), self.alpha, self.pat,
+ self.patArgs)
def __getitem__(self, i):
if isinstance(i, slice):
- return Dictionary(super().__getitem__(i))
+ return Dictionary(super().__getitem__(i),
+ self.alpha, self.pat, self.patArgs)
elif isinstance(i, str):
return StringList(self, i)
elif isinstance(i, tuple):
- return Dictionary([e[i] for e in self])
+ return Dictionary([e[i] for e in self], self.alpha, self.pat,
+ self.patArgs)
return super().__getitem__(i)
def __mul__(self, n):
- return Dictionary(super().__mul__(n))
+ return Dictionary(super().__mul__(n), self.alpha, self.pat,
+ self.patArgs)
def __setitem__(self, index, data):
if isinstance(index, str):
@@ -39,37 +80,114 @@ def __setitem__(self, index, data):
super().__setitem__(index, data)
def __str__(self):
- return '\n'.join([str(e) for e in self])
+ return self.formatString()
+ def applyRuleList(self, lines, field1='pron', field2=None):
+ """Applies the list of rules specified by lines to field1 in each entry
+ of the dictionary, and sets field2 of each entry to the result. field1
+ defaults to 'pron', and field2 defaults to whatever field1 is."""
+ if field2 is None:
+ field2 = field1
+ for e in self:
+ # soundChangeApp.applyRuleList returns a tuple of the word and the
+ # debug lines, but we only want the word
+ e[field2] = soundChangeApp.applyRuleList(e[field1], lines)[0]
+ def applyRuleFiles(self, pairs, field1='pron', field2=None):
+ """Applies the specified sound change files specified by pairs (as in
+ soundChangeApp.applyRuleFiles) to field1 of each entry, setting field2
+ the entry to the result. field1 defaults to 'pron', and field2 defaults
+ to whatever field1 is."""
+ if field2 is None:
+ field2 = field1
+ for e in self:
+ # soundChangeApp.applyRuleFiles returns a tuple of the word and the
+ # debug lines, but we only want the word
+ e[field2] = soundChangeApp.applyRuleFiles(e[field1], pairs)[0]
def delete(self, entry):
"""Removes an entry from the dictionary"""
del self[self.index(entry)]
- def formatString(self, fFunct=None):
- if fFunct is None:
- return str(self)
- return '\n'.join([e.formatString(fFunct) for e in self])
+ def formatString(self, pat=None, patArgs={}):
+ if pat is None:
+ pat = self.pat
+ if patArgs == {}:
+ patArgs = self.patArgs
+ return '\n'.join(e.formatString(pat, patArgs) for e in self)
+ def orderFields(self, fields):
+ """Orders the fields of each entry according to fields. If an entry
+ lacks one of the fields, it is set to a value of ''."""
+ for i in range(len(self)):
+ e = Entry()
+ for f in fields:
+ e[f] = self[i].get(f, '')
+ self[i] = e
def search(self, s, field='word', cats=None):
- """Searches the dictionary for the specified string in the specified\
- field and returns all matches. field defaults to 'word'"""
- out = Dictionary()
+ """Searches the dictionary for the specified string in the specified
+ field and returns all matches. field defaults to 'word'"""
+ out = Dictionary(alpha=self.alpha, pat=self.pat, patArgs=self.patArgs)
for e in self:
if e.check(s, field, cats):
return out
def sort(self, field='word', order=None):
- """Returns the dictionary's entries sorted on the specified field,\
- by the ordering function passed, or default string ordering if none is\
+ """Returns the dictionary's entries sorted on the specified field, by
+ the ordering function passed, or default string ordering if none is
passed. field defaults to 'word'"""
- return Dictionary(sorted(self, key=lambda x: x.orderKey(field, order)))
+ if field == 'word' and order is None:
+ order = self.alpha
+ return Dictionary(sorted(self, key=lambda x: x.orderKey(field, order)),
+ self.alpha, self.pat, self.patArgs)
+ def toJSON(self, filename, override=False):
+ """Saves the dictionary to the specified file"""
+ try:
+ f = open(os.path.expanduser(filename), 'x', encoding='utf-8')
+ except FileExistsError:
+ if not override:
+ print('File exists, overwrite? [Y/n]', end=' ')
+ if 'n' in input().lower():
+ return
+ f = open(os.path.expanduser(fileName), 'w', encoding='utf-8')
+ finally:
+ json.dump(self, f, default=customEncode)
+ f.close()
class Entry(collections.UserList):
"""A dictionary entry"""
- def __init__(self, e=[]):
+ def __init__(self, e=[], pat=None, patArgs={}):
+ if pat is None:
+ self.pat = r'$word$pron$pos$cl$de'
+ if patArgs == {}:
+ self.patArgs = {}
+ self.patArgs['pron'] = ' /$pron/'
+ self.patArgs['pos'] = ' - $pos'
+ self.patArgs['cl'] = ' ($cl$subcl)'
+ self.patArgs['subcl'] = '.$subcl'
+ self.patArgs['de'] = ': $de'
+ else:
+ self.patArgs = patArgs
+ else:
+ self.pat = pat
+ self.patArgs = patArgs
+ try:
+ matcher = entryFormat.match(self.pat, self.patArgs)
+ m = matcher.match(e)
+ if m is not None:
+ e = m.groupdict()
+ for f in self.patArgs:
+ if '?P<' + f + '>' not in matcher.pattern:
+ e[f] = self.patArgs[f]
+ else:
+ e = {}
+ except TypeError:
+ pass
except AttributeError:
@@ -78,9 +196,9 @@ def __init__(self, e=[]):
def __add__(self, e):
- return Entry(super().__add__(e.items()))
+ return Entry(super().__add__(e.items()), self.pat, self.patArgs)
except AttributeError:
- return Entry(super().__add__(e))
+ return Entry(super().__add__(e), self.pat, self.patArgs)
def __contains__(self, i):
return i in self.lookup
@@ -95,20 +213,21 @@ def __delitem__(self, i):
def __getitem__(self, i):
if isinstance(i, slice):
- return Entry(super().__getitem__(i))
+ return Entry(super().__getitem__(i), self.pat, self.patArgs)
return super().__getitem__(i)
except TypeError:
if isinstance(i, str):
return self.lookup[i]
- return Entry([(f, self.lookup[f]) for f in i if f in self])
+ return Entry([(f, self.lookup[f]) for f in i if f in self],
+ self.pat, self.patArgs)
def __iter__(self):
return iter(self.keys())
def __repr__(self):
- return repr(self.lookup)
+ return '{' + ', '.join(repr(k) + ': ' + repr(v) for k, v in l) + '}'
def __setitem__(self, i, v):
@@ -122,40 +241,33 @@ def __setitem__(self, i, v):
self.lookup[i] = v
def __str__(self):
- out = ''
- if 'word' in self.lookup:
- out += self.lookup['word']
- if 'pron' in self.lookup and self.lookup['pron']:
- out += '(' + self.lookup['pron'] + ')'
- if 'pos' in self.lookup and self.lookup['pos']:
- out += ' - ' + self.lookup['pos']
- if 'cl' in self.lookup and self.lookup['cl']:
- out += '(' + self.lookup['cl']
- if 'subcl' in self.lookup and self.lookup['subcl']:
- out += '.' + self.lookup['subcl']
- out += ')'
- if 'de' in self.lookup and self.lookup['de']:
- out += ': ' + self.lookup['de']
- return out
+ return self.formatString()
def check(self, s, field, cats=None):
- """Checks to see if the entry contains the specified string\
- in the specified field"""
+ """Checks to see if the entry contains the specified string in the
+ specified field"""
if cats is None:
- return (field in self.lookup) and (s in self.lookup[field])
+ if field in self.lookup and self.lookup[field] is not None:
+ return regex.search(s, self.lookup[field]) is not None
+ else:
+ return False
- m = soundChanger.findMatch(self.lookup[field], s, cats)
+ m = soundChanger.findMatches(self.lookup[field], s, cats)
except TypeError:
- s = soundChanger.parseSoundChange(s, cats)
- m = soundChanger.findMatch(self.lookup[field], s, cats)
+ s = soundChangeApp.parseRule(s, cats)
+ m = soundChanger.findMatches(self.lookup[field], s, cats)
return m[0] is not None
except KeyError:
return False
- def formatString(self, fFunct=__str__):
- return fFunct(self)
+ def formatString(self, pat=None, patArgs={}):
+ if pat is None:
+ pat = self.pat
+ if patArgs == {}:
+ patArgs = self.patArgs
+ return entryFormat.output(self, pat, patArgs)
def get(self, key, default=None):
return self.lookup.get(key, default)
@@ -167,17 +279,20 @@ def keys(self):
return [k for k, v in self.data]
def orderKey(self, field, order=None):
- """Returns a key based on the ordering function passed,\
- or based on default string ordering if none is passed"""
+ """Returns a key based on the ordering function passed, or based on
+ default string ordering if none is passed"""
return order(self.lookup[field])
except TypeError:
- return self.lookup[field]
+ try:
+ return sortKey(order)(self.lookup[field])
+ except AttributeError:
+ return self.lookup[field]
def reorderFields(self, fields):
"""Returns an Entry with the fields reordered in the order specified by
- e = Entry()
+ e = Entry(pat=self.pat, patArgs=self.patArgs)
for f in fields:
e[f] = self.get(f, '')
return e
@@ -207,15 +322,20 @@ def __setitem__(self, i, data):
def sortKey(alpha):
- """Returns a key for sorting in the alphabetical order in list alpha"""
- a = sorted(alpha, key=lambda x: len(x))
+ """Returns a key for sorting in the alphabetical order in dict alpha, which
+ has the format {'a': 1, 'b': 2 ...}. If a character or sequence of
+ characters is to be ignored in alphabetization, it should correspond to a
+ value of None in this dict. Characters not in this dict will be sorted at
+ position -1"""
+ a = sorted(alpha.keys(), key=lambda x: -len(x))
def key(word):
out = []
- for m in regex.finditer('|'.join(a) + '.', word):
- try:
- out.append(alpha.index(m[0]))
- except ValueError:
+ for m in regex.finditer('(' + '|'.join(a) + ')|.', word):
+ if m.group(1):
+ if alpha[m[0]] is not None:
+ out.append(alpha[m[0]])
+ else:
return out
+import regex
+from .workers import sliceReplace
+varMatcher = lambda s: r'(?.*?)'
+def match(pat, patArgs):
+ """In pat, '$foo' expands recursively to '(' + patArgs['foo'] ')?', if
+ 'foo' is in patArgs, or '(?P.*?)' otherwise. In patArgs['foo'], '$foo'
+ expands to '(?P.*?)'. '^' and '$' are circumpended. ' ' expands to
+ '\\s+'."""
+ args = {}
+ for f in patArgs:
+ args[f] = regex.escape(patArgs[f], True).replace(r'\$', '$')
+ m = regex.search(varMatcher(f), args[f])
+ while m is not None:
+ sp = (m.start(1) - 1, m.end(1))
+ args[f] = sliceReplace(args[f], sp, varGroup(f))
+ m = regex.search(varMatcher(f), args[f])
+ pat = '^' + regex.escape(pat, True).replace(r'\$', '$') + '$'
+ m = varMatch.search(pat)
+ while m is not None:
+ sp = (m.start(1) -1, m.end(1))
+ f = m.group(1)
+ pat = sliceReplace(pat, sp, '(' + args.get(f, varGroup(f)) + ')?')
+ m = varMatch.search(pat)
+ pat = pat.replace(' ', r'\s+')
+ return regex.compile(pat)
+def output(entry, pat, patArgs):
+ """In pat, if 'foo' is in entry, '$foo' expands recursively to
+ patArgs['foo'] if 'foo' is in patArgs, or entry['foo'] otherwise. In
+ patArgs['foo'], '$foo' expands to entry['foo']."""
+ args = {}
+ for f in patArgs:
+ args[f] = patArgs[f]
+ m = regex.search(varMatcher(f), args[f])
+ while m is not None:
+ sp = (m.start(1) - 1, m.end(1))
+ args[f] = sliceReplace(args[f], sp, entry.get(f) or '')
+ m = regex.search(varMatcher(f), args[f])
+ out = pat
+ m = varMatch.search(out)
+ while m is not None:
+ sp = (m.start(1) -1, m.end(1))
+ f = m.group(1)
+ if f in entry and entry[f] is not None:
+ out = sliceReplace(out, sp, args.get(f, entry[f]))
+ else:
+ out = sliceReplace(out, sp, '')
+ m = varMatch.search(out)
+ return out
-+ Searchable
-+ Customizable categories for Entry data
-+ Sortable by custom order
-- Link words to translations (huh?)
-+ Automatic generation of pronunciation from orthography (via sca)
-+ Automatic generation of orthography from pronunciation (via sca)
-+ Sound change applier
-#! /usr/bin/env python3.3 # lint:ok
-class InflectionTable():
- def __init__(self, rows):
- self.rows = rows
- self.data = {}
- numCells = 0
- for r in rows:
- numCells += len(r)
- for i in range(numCells):
- cell = ()
- for r in rows:
- cell += (r[i % len(r)],)
- i //= len(r)
- self.data[cell] = None
- def parse(self, string):
- return tuple(string.split('.'))
- def setCell(self, rc, *cell):
- if tuple(cell) in self.data:
- self.data[tuple(cell)] = rc
- else:
- self.data[self.parse(cell[0])] = rc
- def getCell(self, *cell):
- if tuple(cell) in self.data:
- return self.data[tuple(cell)]
- else:
- return self.data[self.parse(cell[0])]
\ No newline at end of file
+import os
+from . import soundChanger, workers
+def parseRule(l, cats):
+ """Given a line l with the format 'a > b / c_d ! e_f' produces a dict of
+ the form {'from': 'a', 'to': 'b', 'before': 'c', 'after': 'd', 'unbefore':
+ 'e', 'unafter': 'f'}. If l is of the form 'a = b c d', the output is a dict
+ of the form {'catName': 'a', 'category': ['b', 'c', 'd']}. Category names
+ in curly brackets are expanded."""
+ wordBoundary = r'((?<=^|\s)|(?=$|\s))'
+ out = {}
+ if len(l.split(' = ')) == 2:
+ # If there is an equals sign, it's a category
+ out['catName'] = l.split(' = ')[0].strip()
+ category = l.split(' = ')[1]
+ # expand categories
+ for c in cats:
+ category = category.replace('{' + c + '}', ' '.join(cats[c]))
+ out['category'] = category.split()
+ else:
+ # Otherwise, it's a sound change rule
+ try:
+ # Attempt to set 'from' and 'to'. If there isn't a ' > ', it will
+ # raise an IndexError when trying to set 'to', so 'from' will be
+ # set, but 'to' will not. This could be used when parsing a rule to
+ # be used as a search pattern, and not as a sound change. Need to
+ # split on ' / ' and ' ! ' in case it is being used in this way.
+ out['from'] = l.split(' > ')[0].split(' / ')[0].split(' ! ')[0]
+ # Treat '0' like ''
+ if out['from'] == '0':
+ out['from'] = ''
+ out['from'] = out['from'].replace('#', wordBoundary)
+ out['to'] = l.split(' > ')[1].split(' / ')[0].split(' ! ')[0]
+ out['to'].replace('#', wordBoundary)
+ # Treat '0' like ''
+ if out['to'] == '0':
+ out['to'] = ''
+ except IndexError:
+ pass
+ try:
+ # Attempt to set 'before' and 'after'. If there isn't a ' / ', it
+ # will raise an IndexError, and neither will be set. If there isn't
+ # a '_', it will raise an IndexError when trying to set 'after', so
+ # 'before' will be set, but 'after' will not.
+ out['before'] = l.split(' / ')[1].split('_')[0].split(' ! ')[0]
+ out['before'] = out['before'].replace('#', wordBoundary)
+ out['after'] = l.split(' / ')[1].split('_')[1].split(' ! ')[0]
+ out['after'] = out['after'].replace('#', wordBoundary)
+ except IndexError:
+ pass
+ try:
+ # Attempt to set 'unbefore' and 'unafter'. Same comments apply as
+ # for 'before' and 'after'. Note that the negative conditions must
+ # come after the positive conditions, if both exist.
+ out['unbefore'] = l.split(' ! ')[1].split('_')[0]
+ out['unbefore'] = out['unbefore'].replace('#', wordBoundary)
+ out['unafter'] = l.split(' ! ')[1].split('_')[1]
+ out['unafter'] = out['unafter'].replace('#', wordBoundary)
+ except IndexError:
+ pass
+ return out
+def applyRuleList(word, lines):
+ """Applies the list of rules specified by lines to word. Returns a tuple of
+ the final word and debug, which lists the outcome of each rule."""
+ cats = {}
+ debug = ''
+ for l in lines:
+ rc = parseRule(l, cats)
+ if 'catName' in rc:
+ cats[rc['catName']] = rc['category']
+ debug += l + '\n'
+ else:
+ word = soundChanger.applyRule(word, rc, cats)
+ debug += l + ' ' + word
+ return word, debug
+def loadTextFile(filename):
+ """Loads a text file as a list of lines, ignoring blank lines and lines
+ starting with '//'."""
+ with open(os.path.expanduser(filename), encoding='utf-8') as f:
+ return [l.strip('\n') for l in f if l.strip() and l[:1] != '//']
+lf = lambda f: loadTextFile(workers.FILE_PATH + '/files/' + f)
+def step(start, end):
+ """Provides the next step between start and end. Does not check if they are
+ linearly connected."""
+ if start != '':
+ # add a dot after nonempty strings before adding the next step. the dot
+ # can't come from end, because we get the next step using .split('.'),
+ # which necessarily will not contain a '.'
+ start += '.'
+ return start + end[len(start):].split('.')[0]
+def applyRuleFiles(word, pairs, debug=0):
+ """pairs format: (('a', 'a.b.c'), ('c', 'c.d')) or (('a', '.b.c'), ('c',
+ '.d')), where FILE_PATH/files/a.b, FILE_PATH/files/a.b.c, and
+ FILE_PATH/files/c.d each contain a list of sound change rules.
+ debug = 0: don't show anything
+ debug = 1: word at end of each pair
+ debug = 2: word at end of each file
+ debug = 3: word at end of each rule
+ Returns a tuple of the final word and the debug info."""
+ # if any debug info is to be shown, start by adding the initial word
+ if len(pairs) > 0:
+ db = pairs[0][0] + ': ' + word + '\n' if debug else ''
+ for p in pairs:
+ if len(p[1]) > 0 and p[1][0] == '.':
+ # handle relative filenames
+ p[1] = p[0] + p[1]
+ if p[1].startswith(p[0]):
+ cur, end = p
+ while cur != end:
+ cur = step(cur, end)
+ word, steps = applyRuleList(word, lf(cur))
+ if debug > 2:
+ db += steps
+ if debug > 1:
+ db += cur + ': ' + word + '\n'
+ else:
+ # if the members of a pair aren't directly related, it can't do
+ # anything
+ raise Exception(p[1] + ' does not start with ' + p[0])
+ if debug == 1:
+ db += p[1] + ': ' + word + '\n'
+ return word, db
-#! /usr/bin/env python3.4 # lint:ok
import regex
from . import workers
-# rules are stored as dicts:
-# {'from': '', 'to': '', 'before': '', 'after': '',
-# 'unbefore': '', 'unafter': ''}
+catMatcher = regex.compile(r'\{(\d*):?(\w*)\}')
-# all cats would be included in a dict with keys being the cat name,
-# and the values being arrays of the members of that cat
-# e.g. {'nasal':['m', 'n', 'ŋ'], 'vlplos':['p', 't', 'k'], ...}
+def catReplace(m, cats):
+ """Replaces categories and numbered categories from cats with regex strings
+ that will match them appropriately"""
+ n, c = m.groups()
+ if c in cats:
+ if n == '':
+ return '(' + '|'.join(cats[c]) + ')'
+ return '(?P' + '|'.join(cats[c]) + ')'
+ return m.group(0)
-def parseSoundChange(l, cats):
- out = {}
- if len(l.split(' = ')) == 2:
- out['catName'] = l.split(' = ')[0].strip()
- category = l.split(' = ')[1]
- for c in cats:
- category = category.replace('{' + c + '}',
- ' '.join(x if x else '0' for x in cats[c]))
- out['category'] = category.split()
- else:
- try:
- out['from'] = l.split(' > ')[0].split(' / ')[0].split(' ! ')[0]
- if out['from'] == '0':
- out['from'] = ''
- out['from'] = out['from'].replace('#', r'((?<=^|\s)|(?=$|\s))')
- out['to'] = l.split(' > ')[1].split(' / ')[0].split(' ! ')[0]
- out['to'].replace('#', r'((?<=^|\s)|(?=$|\s))')
- if out['to'] == '0':
- out['to'] = ''
- except IndexError:
- out['to'] = ''
- try:
- out['before'] = l.split(' / ')[1].split('_')[0]
- out['before'] = out['before'].replace('#', r'((?<=^|\s)|(?=$|\s))')
- out['after'] = l.split(' / ')[1].split('_')[1].split(' ! ')[0]
- out['after'] = out['after'].replace('#', r'((?<=^|\s)|(?=$|\s))')
- except IndexError:
- pass
- try:
- out['unbefore'] = l.split(' ! ')[1].split('_')[0]
- out['unbefore'] = out['unbefore'].replace('#', r'((?<=^|\s)|(?=$|\s))')
- out['unafter'] = l.split(' ! ')[1].split('_')[1]
- out['unafter'] = out['unafter'].replace('#', r'((?<=^|\s)|(?=$|\s))')
- except IndexError:
- pass
- return out
-def checkNumCats(m, ruleStr, cats):
- # now u test the match to see if it matches with numbered categories
- # matched = m.group(1)
- catIndex = []
- iNc = 0
- for c in regex.finditer(r'\{(\d+):([^}]*)}', ruleStr):
- # now what? maybe i should have it search the matched
- # string for this category, find which it is, and determine
- # which index for which numbered category
- cNum = int(c.group(1))
- cCat = c.group(2)
- cNc = 'nc' + str(iNc)
- cMatch = m.group(cNc)
- if cCat in cats:
- if len(catIndex) > cNum and catIndex[cNum] is not None:
- if cats[cCat][catIndex[cNum]] != cMatch:
- return None
- # if anything has the wrong number,
- # quit out of this match
- else:
- workers.addPad(catIndex, cNum,
- cats[cCat].index(cMatch))
- iNc += 1
- return catIndex
+def toCatReplace(m, cats, indices):
+ """Replaces numbered categories in a match with the appropriate element of
+ a category"""
+ n, c = m.groups()
+ if c in cats and n != '':
+ # treat '0' as '', to allow for categories with gaps
+ if cats[c][indices['nc' + n]] == '0':
+ return ''
+ return cats[c][indices['nc' + n]]
+ return m.group(0)
-def findMatch(word, rule, cats):
- """Applies a specified rule to the given word,\
- making category replacements if necessary"""
- # create match, ignoring numbered categories
- matchStr = '('
+def findMatches(word, rule, cats):
+ """Finds all matches of rule in word"""
+ # First, generate a regex pattern to search for:
+ pattern = ''
if 'before' in rule and rule['before'] != '':
- matchStr += '(?<=(?P' + rule['before'] + '))'
+ pattern += '(?<=(?P' + rule['before'] + '))'
if 'unbefore' in rule and rule['unbefore'] != '':
- matchStr += '(?' + rule['from'] + ')'
+ pattern += '(?' + rule['from'] + ')'
if 'after' in rule and rule['after'] != '':
- matchStr += '(?=(?P' + rule['after'] + '))'
+ pattern += '(?=(?P' + rule['after'] + '))'
if 'unafter' in rule and rule['unafter'] != '':
- matchStr += '(?!' + rule['unafter'] + ')'
- matchStr += ')'
- ruleStr = matchStr
- # replace categories
- nc = [0]
- def catReplace(m):
- if m.group(2) in cats:
- out = '('
- if m.group(1) == '':
- out += '|'.join(cats[m.group(2)])
- else:
- out += '?P' + '|'.join(cats[m.group(2)])
- nc[0] += 1
- return out + ')'
- else:
- return m.group(0)
- matchStr = regex.sub(r'\{(\d*):?([^}]*)}', catReplace, matchStr)
- try:
- match = list(regex.finditer(matchStr, word))
- except regex._regex_core.error as e:
- raise type(e)(e.args + (matchStr, word))
+ pattern += '(?!' + rule['unafter'] + ')'
+ # Next, replace all the categories in the pattern with real regex
+ pattern = catMatcher.sub(lambda m: catReplace(m, cats), pattern)
+ # Now pattern is a valid regex
+ matches = []
catIndex = []
- if nc[0] and match != []:
- for m in range(len(match)):
- catIndex.append(checkNumCats(match[m], ruleStr, cats))
- return (match, matchStr, ruleStr, catIndex)
+ for m in regex.finditer(pattern, word):
+ # For each match, check that the numbered categories match, and
+ # populate catIndex with the indices associate with each one
+ try:
+ catIndex.append(numberedCategories(m, cats))
+ matches.append(m)
+ except ValueError:
+ # The numbered categories didn't match
+ pass
+ # Each match in matches is a valid match, since those that had mismatching
+ # numbered categories were never added to it.
+ return matches, catIndex
+def numberedCategories(m, cats):
+ """Returns a dict of the indices associated with each numbered category, of
+ the form {'nc0': 1, 'nc1': 2, ...}"""
+ captures = m.capturesdict()
+ indices = {}
+ for nc in captures:
+ if nc.startswith('nc'):
+ n, c = nc.split('_', 1)
+ if len(set(captures[nc])) > 1:
+ # the same number/category pair matched multiple strings
+ raise ValueError()
+ if n in indices:
+ if cats[c][indices[n]] != captures[nc][0]:
+ # a previous number/category pair had a different index
+ raise ValueError()
+ else:
+ indices[n] = cats[c].index(captures[nc][0])
+ return indices
def applyRule(word, rule, cats):
- match, matchStr, ruleStr, catIndex = findMatch(word, rule, cats)
- if match == []:
+ """Applies the sound change specified by rule to word"""
+ matches, catIndex = findMatches(word, rule, cats)
+ if len(matches) == 0:
+ # there were no matches, so no changes need to be applied
return word
- i = [-1]
- def fromToTo(m):
- # now, replace from (m.group('from')) with to
- toStr = rule['to']
- i[0] += 1
- if checkNumCats(m, ruleStr, cats) is None:
- return m.group()
- def toCatReplace(c):
- cNum = int(c.group(1))
- cCat = c.group(2)
- if cCat in cats:
- if len(catIndex[i[0]]) > cNum and catIndex[i[0]][cNum] is not None:
- if cats[cCat][catIndex[i[0]][cNum]] == '0':
- return ''
- return cats[cCat][catIndex[i[0]][cNum]]
- return regex.sub(r'\{(\d+):([^}]*)}', toCatReplace, toStr)
- word = regex.sub(matchStr, fromToTo, word)
+ # starting from the end of the string, replace each match with the output
+ # of the rule
+ for match, indices in zip(reversed(matches), reversed(catIndex)):
+ # produce the appropriate replacement, given numbered categories, and
+ # insert it into the word
+ word = workers.sliceReplace(word, match.span(),
+ catMatcher.sub(lambda m: toCatReplace(m, cats, indices),
+ rule['to']))
return word
-def applyRules(word, soundChanges):
- cats = {}
- debug = ''
- try:
- for l in soundChanges:
- rc = parseSoundChange(l, cats)
- if 'catName' in rc:
- cats[rc['catName']] = rc['category']
- else:
- word = applyRule(word, rc, cats)
- debug += l + ' ' + word + '\n'
- except TypeError:
- for w in range(len(word)):
- word[w] = applyRules(word[w], soundChanges)
- return word, debug
-def getCats(soundChanges):
- cats = {}
- for l in soundChanges:
- rc = parseSoundChange(l, cats)
- if 'catName' in rc:
- cats[rc['catName']] = rc['category']
- return cats
\ No newline at end of file
-from soundChanger import applyRule
-cats = {'V': ['u', 'z', 'i', 'ɯ', 'a'],
- 'F': ['f', 's', 'ç', 'x', 'ħ']}
-rules = [{'from': '{0:F}', 'to': '{0:V}', 'before': '{F}', 'after': '{F}'},
- {'from': '{0:V}', 'to': '{0:F}', 'before': '{V}', 'after': '{V}'}]
-word = input()
-#word = 'ħsfuza'
-for rule in rules:
- print(rule)
- word = applyRule(word, rule, cats)
- print(word)
-#! /usr/bin/env python3.3 # lint:ok
+import cgitb
+import sys
+import os
+FILE_PATH = '/mit/sashacf/web_scripts/soundchanger'
def addPad(l, n, item):
- '''adds item to list l at position n, padding l with the value None, if
- necessary'''
+ """Adds item to list l at position n, padding l with the value None, if
+ necessary"""
if n >= len(l):
l += [None] * (n + 1 - len(l))
if l[n] is None:
@@ -11,14 +14,27 @@ def addPad(l, n, item):
def flipDict(d):
- '''returns a dict with values and keys reversed'''
+ """Returns a dict with values and keys reversed"""
return {v: k for k, v in d.items()}
-class LoopBreak(Exception):
- pass
+class Reencoder():
+ """A stream that uses 'xmlcharrefreplace' to reencode it's output."""
+ def __init__(self, stream=sys.__stdout__):
+ self.stream = stream
+ def write(self, *a):
+ return self.stream.write(*(reencode(s) for s in a))
+ def flush(self):
+ return self.stream.flush()
+reencode = lambda s: s.encode('ascii', 'xmlcharrefreplace').decode()
def sliceReplace(word, sl, repl):
- '''returns word with the slice indicated by sl replaced with repl'''
+ """Returns word with the slice indicated by sl replaced with repl"""
return word[:sl[0]] + repl + word[sl[1]:]
-import cgitb
-import sys
-import asc
-FILE_PATH = '/mit/sashacf/web_scripts/soundchanger'
-class Reencoder():
- def __init__(self, stream=sys.__stdout__):
- self.stream = stream
- def write(self, *a):
- return self.stream.write(*(reencode(s) for s in a))
- def flush(self):
- return self.stream.flush()
-reencode = lambda s: s.encode('ascii', 'xmlcharrefreplace').decode()
-if __name__ == '__main__':
- print('Content-Type: text/html')
- print('')
- print(reencode(asc.asc('sˈezuza sˈewa', [('prt.west.sajura', 'prt.west.sajura.purrub.middle.em.modern.orth')], 0, '/mit/sashacf/web_scripts/soundchanger')[0]))
\ No newline at end of file
