-
Notifications
You must be signed in to change notification settings - Fork 9
/
hvutil.py
453 lines (408 loc) · 17.6 KB
/
hvutil.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
# name really sais it all: a grab-bag of potentially useful stuff
# $Id: hvutil.py,v 1.16 2015-09-23 12:25:48 jive_cc Exp $
# $Log: hvutil.py,v $
# Revision 1.16 2015-09-23 12:25:48 jive_cc
# HV: * add method 'split_optarg' for simple-minded "command line parsing".
# Splits its "*args" (all strings) in two parts: arguments and options
# where options are defined to be strings matching 'key=value'.
# It provides the possibility to specify a check/convert utility to
# transform <value> to a desired type.
#
# Revision 1.15 2015-09-21 11:36:22 jive_cc
# HV: * The big push. The 'command.py' interface long left something
# to be desired if a command specified an 'args()' method to transform
# the line of input into multiple arguments and call the callback function
# with those arguments [hint: it wouldn't].
# Now the command.py object calls the callback as:
# "cmd.callback( *cmd.args(input) )"
# * all commands have been visited and modified to be specified as
# "def xyz( .., *args)"
# and do the right thing if called with wrong number of arguments.
# Many command's regex's got simpler.
# * fixed some error messages to be more readable
# * extended the documentation of x, y scaling of plots ("xyscale")
# * clear error message if adressing an invalid panel in a plot
# * the 'new plot' specification was widened: can now use comma-separated
# list of axes to set the new plot value in one go: "new p,sb true"
# * the "cd" command now does '~' expansion at the beginning of the path
# * fixed bug in pgplot device name generation in case details were left out
#
# Revision 1.14 2014-04-23 14:16:39 jive_cc
# HV: * added generalized version of "quote_split" which allows splitting
# based on a separator char unless it is within a quoted section where
# the open/close markers are arguments to the function
#
# Revision 1.13 2014-04-15 12:21:56 jive_cc
# HV: * pagelabel is now centralized computed in the base class
# we now have maximum consistency between all plots
#
# Revision 1.12 2014-04-08 22:41:10 jive_cc
# HV: Finally! This might be release 0.1!
# * python based plot iteration now has tolerable speed
# (need to test on 8M row MS though)
# * added quite a few plot types, simplified plotters
# (plotiterators need a round of moving common functionality
# into base class)
# * added generic X/Y plotter
#
# Revision 1.11 2014-04-02 17:55:29 jive_cc
# HV: * another savegame, this time with basic plotiteration done in Python
#
# Revision 1.10 2013-12-12 14:10:15 jive_cc
# HV: * another savegame. Now going with pythonic based plotiterator,
# built around ms2util.reducems
#
# Revision 1.9 2013-09-03 17:34:30 jive_cc
# HV: * Amazing! All plot types do work now. Can save plots to file.
# Still some details left to do obviously ...
#
# Revision 1.8 2013-06-19 12:28:43 jive_cc
# HV: * making another savegame
#
# Revision 1.7 2013-04-02 16:35:21 jive_cc
# HV: * quote_split() moved to command.py (presumably mostly useful there)
# * added cycle_detection algorithm to find cycles inna graph
#
# Revision 1.6 2013-03-31 17:17:56 jive_cc
# HV: * another savegame
#
# Revision 1.5 2013-03-09 16:59:07 jive_cc
# HV: * another savegame
#
# Revision 1.4 2013-02-19 16:53:29 jive_cc
# HV: * About time to commit - make sure all edits are safeguarded.
# Making good progress. baselineselection, sourceselection and
# timeselection working
#
# Revision 1.3 2013-02-11 09:40:33 jive_cc
# HV: * saving work done so far
# - almost all mapping-functionality is in place
# - some of the API functions are starting to appear
#
# Revision 1.2 2013-01-29 12:23:44 jive_cc
# HV: * time to commit - added some more basic stuff
#
from __future__ import print_function
from functional import map_, filter_, zip_, enumerate_, range_, is_not_none, drap
from six import iteritems
from functools import reduce, partial
import itertools, operator, re, string, copy, math, datetime
## Partition a list into two lists - one with the elements satisfying the predicate
## and one with the elements who don't
def partition(pred, lst):
y, n = (list(), list())
for elem in lst:
y.append(elem) if pred(elem) else n.append(elem)
return (y, n)
# map a function over all the values in the dict
# {k:v} => {k:f(v)}
def dictmap(f, d):
return dict((k, f((k,v))) for (k,v) in iteritems(d))
# reduce a dict to one value:
# acc = f( (k1, v1), f( (k0,v0), acc0 ) ) etc
# "f" is called as: f(elem, acc)
# and should return the new accumulator
def dictfold(f, a, d):
for x in iteritems(d):
a = f(x, a)
return a
def mkerrf(pfx):
def actualerrf(msg):
raise RuntimeError("{0} {1}".format(pfx, msg))
return actualerrf
## Return the minimum + maximum of a sequence in one go.
## Note: the sequence must have at least 1 element
def minmax(seq):
mi,ma = (None, None)
for item in seq:
mi = min(mi, item)
ma = max(ma, item)
return (mi, ma)
#return reduce(lambda (mi,ma) ,y : (min(mi,y), max(ma,y)), seq[1:], (seq[0], seq[0])) if len(seq) else (None, None)
## Round floating point number to n decimal places
def round(n):
factor = 10**n
return lambda x: math.floor( x * factor )/factor
## Determines if a is a subset of b
def contains(a, b):
return set(a).issubset(set(b))
## enumerate the items in iterable and
## yield the slice of [first, last]
def enumerateslice(iterable, first, last):
return itertools.takewhile(lambda i_v: i_v[0]<last, itertools.dropwhile(lambda i_v: i_v[0]<first, enumerate(iterable)))
## Convert fractional day into datetime.time
def fractionalDayToTime(frac):
ofrac = frac
(frac, h) = math.modf(frac*24)
(frac, m) = math.modf(frac*60)
(frac, s) = math.modf(frac*60)
return datetime.time(h, m, s, int(frac*1.0e6))
## Convert time in seconds-since-start-of-day into datetime.time
def secondsInDayToTime(sssod):
# 3600 seconds per hour
(h, s) = divmod(sssod, 3600)
(m, s) = divmod(s, 60)
(fs, s) = math.modf( s )
return datetime.time(int(h), int(m), int(s), int(fs*1.0e6))
## Generalized version of 'quote_split' as found in command.py
## This one defaults to "'" for opening and "'" for closing quotes
## but these characters can be overridden
##
## quote_split do not extract _words_ but split the
## string at the indicated character,
## provided the character is not inside
## quotes
##
## "aap 'noot 1; mies 2'; foo bar"
## results in:
## ["aap 'noot 1;mies 2'", "foo bar"]
def quote_split(s, splitchar=';', quotes="'"):
q = list(quotes)
if len(q)==1:
q.append( q[0] )
if len(q)!=2:
raise RuntimeError("Quotes must be an array of two length")
rv = [""]
# switch quotiness if we see quotes[switch]
switch = 0
inquote = False
for i in range_(len(s)):
if not inquote and s[i]==splitchar:
rv.append( "" )
continue
rv[-1] = rv[-1] + s[i]
if s[i]==q[switch]:
inquote = not inquote
switch = 1 - switch
return rv
## itemgetter: differs wrt operator.itemgetter (standard Python version) in that
## this one _always_ yields a tuple, no matter how many arguments were given.
## The built-in itemgetter does, depending on the amount of arguments, the following:
##
## #of args to "operator.itemgetter()" action
## 0 Crash! ('list indices must must be integers, not list')
## 1 return an object
## 2 or more return a tuple with the addressed objects
##
## This version:
##
## n return a tuple with the n addressed objects
## irrespective of n==0, n==1,... n==42, ...
itemgetter = lambda *args: lambda thing: tuple([thing[x] for x in args])
attrgetter = lambda *args: lambda thing: tuple([getattr(thing, x) for x in args])
## Finds consecutive ranges in the sequence
## returns a list of (start, end) values
## of ranges start ... end (the end is inclusive)
## of which the sequence was made of
## Based on
## http://stackoverflow.com/questions/2361945/detecting-consecutive-integers-in-a-list
def find_consecutive_ranges(seq):
# groupby() returns [(key, group)]
# where group = iterable sequence
# of [(idx, item)] (because that's
# what we gave to the groupby() -
# we enumerated our original items!
# so after the groupby phase we
# first extract all the groups (we don't care
# about the actual grouping key)
# by mapping the itemgetter over the list of groups
# then we're left with an iterable which contains iterable
# sequences of which we need to find the first and last item
# Apparently there can be instances where the groupby returns
# an empty list so we need to take care of that and
# filter those non-results out
def maker( key_seq ):
# undo the enumeration we added to allow
# identification of consecutive elements
l = map_(operator.itemgetter(1), key_seq[1])
return (l[0], l[-1]) if l else None
return filter_(is_not_none,
map(maker,
itertools.groupby(enumerate(sorted(seq)), lambda i_x:i_x[0]-i_x[1])))
## After having found consecutive ranges, you might want a string representation
## [(start, end), ...] =>
## "<start>" (if start==end)
## "<start>-<end>" otherwise
def range_repr(l, rchar=":"):
return ",".join(map(lambda s_e: "{0}".format(s_e[0]) if s_e[0]==s_e[1] else "{0}{2}{1}".format(s_e[0],s_e[1],rchar) if abs(s_e[0]-s_e[1])>1 else "{0},{1}".format(s_e[0],s_e[1]), l))
## Does the inverse of the previous one:
## expands string "1:10,13,20:22" into [1,2,3,4,5,6,7,8,9,10,13,20,21,22]
## and "5:2" into [5,4,3,2]
##
## Supports arbitrary increments
## 1:10:2 => [1,3,5,7,9]
## Support arithmetic:
## "2*10:3*10,12-2:12+2"
## All expressions will be converted to int after evaluating
def expand_string_range(s, rchar=":"):
rxNum = re.compile(r"^\d+$")
rxRng = re.compile(r"^(?P<s>[-\d\.+*/%()]+)"+rchar+"(?P<e>[-\d\.+*/%()]+)(:(?P<step>[-+]?\d+))?$")
def count_from_to(s,e,step):
while abs(s-e)>=abs(step):
#print "s:{0} e:{1} diff:{2}".format(s, e, abs(s-e))
yield s
s = s + step
if abs(s-e)<=abs(step):
yield s
return
def mkcounter(item):
mo = rxRng.match(item)
if mo:
# start, end may be expressions
(s,e) = (int(eval(mo.group('s'))), int(eval(mo.group('e'))))
defstep = 1 if (s<e) else -1
step = mo.group('step')
step = int(step) if step else defstep
# Test if we actually *can* count from start -> end using step:
# e.g.: 1 -> 10, step -1 isn't going to end very well is it?
# -1 -> -10, step 1 ,, ,,
# Step size "0" is ONLY allowed if start==end!
# Also assure ourselves that the step direction and counting
# direction are identical
if (not step and (s-e)) or (((e-s) * step )<0):
raise RuntimeError("cannot count from {0} to {1} with step {2}".format(s, e, step))
return count_from_to(s, e, step)
else:
mo = rxNum.match(str(eval(item)))
if not mo:
raise ValueError("{0} is not a number! (It's a free man!)".format(item))
# 'item' may be an expression!
item = int(eval(item))
# Note: seems superfluous to return a counter for 1 number but now
# we have a list of iterables which can easily be transformed into
# one list via itertools.chain() (see below)
return count_from_to(item, item, 1)
return list(itertools.chain(*[mkcounter(x) for x in s.split(",")]))
##
## String processing utilities
##
## take a list of (pattern, replacement) tuples and run them
## over the string to produce the final edited string
subber = lambda acc, pat_repl: re.sub(pat_repl[0], pat_repl[1], acc)
sub = lambda txt, lst: reduce(subber, lst, txt)
# Return a predicate function which will keep returning True
# until n occurrences of the character 'c' have been seen.
# Can be used e.g. conjunction with itertools.takewhile()
def untilnth(n, char):
# we need to creat an in-place modifyable 'thing'
# orelse a "n=n-1" in the match() inner fn
# will give an UnboundLocalError exception:
# http://eli.thegreenplace.net/2011/05/15/understanding-unboundlocalerror-in-python/
o = type('',(),{"n":n})
def match(c):
if c==char:
o.n = o.n - 1
if o.n<=0:
return False
return True
return match
## return the leading part of the string up until the n'th
## occurrence of 'char'
## This is 'better' than s.split(sep=char)[0:n]
## because you will have lost the actual sep characters!
## (could do "char.join(s.split(sep=char)[0:n])" but then
## you're introducing elements in the string yourself ...
## (you don't know now many "split()" has taken out!
def strcut(s, n, char):
return ''.join(itertools.takewhile(untilnth(n, char), s))
## escape split - split a string at whitespace,
## honouring the escape character (single quote)
## aap 'noot mies' is OK
## aap 'no\'ot mies' is OK
##
## Note: the quoted string MUST be surrounded by whitespace:
## aap no'oot mies' is NOT OK
## aap 'noot''mies' is NOT OK
#rxSplit = re.compile(r"('.*?(?<!\\)'|\S+)")
rxSplit = re.compile(r"('.*?(?<!\\)'(?=\s|$)|\S+)")
rxUnescapedQuote = re.compile(r"(?<!\\)'")
stripEscapeChars = partial(re.sub, r"\\", r"")
def escape_split(s):
# three stage process:
# (1) find all quoted string + whitespace separated words
# (2) detect errors - if there are non-escaped quote(s) in
# a word dat's an error
# (3) strip all escape sequences
# (1) find a list of strings and strip the leading+trailing quote
# if any
words = map_(lambda x: x[1:-1] if x[0]=="'" else x, rxSplit.findall(s))
# (2) detect syntax errors like "n'oot" "aap 'noot''mies'"
if any(map(rxUnescapedQuote.search, words)):
raise SyntaxError("quoted string error - either no whitespace between " \
"quoted elements or an unescaped quote in the middle " \
"of a word")
# (3) strip the escape character(s)
return map_(stripEscapeChars, words)
## Flatten a list of lists
## http://grokbase.com/t/python/tutor/051c5rgj5s/flattening-a-list
def flatten(a):
"""Flatten a list."""
return bounce(flatten_k(a, lambda x: x))
def bounce(thing):
"""Bounce the 'thing' until it stops being a callable."""
while callable(thing):
thing = thing()
return thing
def flatten_k(a, k):
"""CPS/trampolined version of the flatten function. The original
function, before the CPS transform, looked like this:
def flatten(a):
if not isinstance(a,(tuple,list)): return [a]
if len(a)==0: return []
return flatten(a[0])+flatten(a[1:])
The following code is not meant for human consumption.
"""
if not isinstance(a,(tuple,list)):
return lambda: k([a])
if len(a)==0:
return lambda: k([])
def k1(v1):
def k2(v2):
return lambda: k(v1 + v2)
return lambda: flatten_k(a[1:], k2)
return lambda: flatten_k(a[0], k1)
## cycle_detect: Return a list of all cycles in the given graph
## Uses depth-first search and a stack of 'seen' nodes to detect
## if we cross a node we've seen before.
## graph = { "n":["n1","n2"], "m":["n3","n4"], ....}
# helper function: the caching depth-first-search
def dfs_seen(node, graph, seen, cycles):
if node in seen:
cycles.add(frozenset(seen[seen.index(node):]))
else:
seen.append(node)
if node in graph:
# drain + map() - we not interested in the return value of the map()
drap(lambda x: dfs_seen(x, graph, copy.deepcopy(seen), cycles), graph[node])
return cycles
def cycle_detect(graph):
# for each starting point, start with a fresh 'seen' and collect all cycles
# IF there is a cycle with 'n' nodes in it, we would end up with
# 'n' entries of the same "seen" set, therefore our accumulation is a set:
# it will automatically weed out the duplicates
return list(reduce(lambda acc, k: dfs_seen(k, graph, list(), acc), graph, set()))
##
## Simple-minded 'command (line)' parsing
## by separating list of arguments into those matching "<key>=<value>" from
## those that don't. For those that do match this pattern, the match-object
## containing two groups - 'key' and 'value' - is returned.
##
## "split_optarg(*args)" => (arguments, options)
##
## arguments = list of strings, elements in *args that did not match 'k=v'
## options = dict of {key:value} pairs parsed from *args
##
## Note that one or both lists could be empty, depending on the contents of *args
rxKeyValue = re.compile(r"^(?P<key>[^=]+)=(?P<value>.+)$")
identity = lambda x: x
def mo_or_org(x, **convdict):
mo = rxKeyValue.match(x)
return (mo.group('key'), convdict.get(mo.group('key'), identity)(mo.group('value'))) if mo else x
def split_optarg(*args, **convdict):
(a, o) = partition(lambda x: isinstance(x, str), map(lambda y: mo_or_org(y, **convdict), args))
return (a, dict(o))
# Simple+slow uniq/unique function that retains order
# cf. https://stackoverflow.com/a/37163210
def uniq(l):
used = set()
return [x for x in l if x not in used and (used.add(x) or True)]