forked from luci/luci-py
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathisolate_format.py
663 lines (555 loc) · 21.8 KB
/
isolate_format.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
# Copyright 2014 The LUCI Authors. All rights reserved.
# Use of this source code is governed under the Apache License, Version 2.0
# that can be found in the LICENSE file.
"""Contains logic to parse .isolate files.
This module doesn't touch the file system. It's the job of the client code to do
I/O on behalf of this module.
See more information at
https://github.com/luci/luci-py/tree/master/appengine/isolate/doc/client
https://github.com/luci/luci-py/blob/master/appengine/isolate/doc/Design.md#isolated-file-format
"""
import ast
import itertools
import logging
import os
import posixpath
import re
import sys
from utils import fs
# Valid variable name.
VALID_VARIABLE = '[A-Za-z_][A-Za-z_0-9]*'
class IsolateError(ValueError):
"""Generic failure to load a .isolate file."""
pass
def determine_root_dir(relative_root, infiles):
"""For a list of infiles, determines the deepest root directory that is
referenced indirectly.
All arguments must be using os.path.sep.
"""
# The trick used to determine the root directory is to look at "how far" back
# up it is looking up.
deepest_root = relative_root
for i in infiles:
x = relative_root
while i.startswith('..' + os.path.sep):
i = i[3:]
assert not i.startswith(os.path.sep)
x = os.path.dirname(x)
if deepest_root.startswith(x):
deepest_root = x
logging.info(
'determine_root_dir(%s, %d files) -> %s',
relative_root, len(infiles), deepest_root)
return deepest_root
def replace_variable(part, variables):
m = re.match(r'<\((' + VALID_VARIABLE + ')\)', part)
if m:
if m.group(1) not in variables:
raise IsolateError(
'Variable "%s" was not found in %s.\nDid you forget to specify '
'--path-variable?' % (m.group(1), variables))
return str(variables[m.group(1)])
return part
def eval_variables(item, variables):
"""Replaces the .isolate variables in a string item.
Note that the .isolate format is a subset of the .gyp dialect.
"""
return ''.join(
replace_variable(p, variables)
for p in re.split(r'(<\(' + VALID_VARIABLE + '\))', item))
def pretty_print(variables, stdout):
"""Outputs a .isolate file from the decoded variables.
The .isolate format is GYP compatible.
Similar to pprint.print() but with NIH syndrome.
"""
# Order the dictionary keys by these keys in priority.
ORDER = ('variables', 'condition', 'command', 'files', 'read_only')
def sorting_key(x):
"""Gives priority to 'most important' keys before the others."""
if x in ORDER:
return str(ORDER.index(x))
return x
def loop_list(indent, items):
for item in items:
if isinstance(item, basestring):
stdout.write('%s\'%s\',\n' % (indent, item))
elif isinstance(item, dict):
stdout.write('%s{\n' % indent)
loop_dict(indent + ' ', item)
stdout.write('%s},\n' % indent)
elif isinstance(item, list):
# A list inside a list will write the first item embedded.
stdout.write('%s[' % indent)
for index, i in enumerate(item):
if isinstance(i, basestring):
stdout.write(
'\'%s\', ' % i.replace('\\', '\\\\').replace('\'', '\\\''))
elif isinstance(i, dict):
stdout.write('{\n')
loop_dict(indent + ' ', i)
if index != len(item) - 1:
x = ', '
else:
x = ''
stdout.write('%s}%s' % (indent, x))
else:
assert False
stdout.write('],\n')
else:
assert False
def loop_dict(indent, items):
for key in sorted(items, key=sorting_key):
item = items[key]
stdout.write("%s'%s': " % (indent, key))
if isinstance(item, dict):
stdout.write('{\n')
loop_dict(indent + ' ', item)
stdout.write(indent + '},\n')
elif isinstance(item, list):
stdout.write('[\n')
loop_list(indent + ' ', item)
stdout.write(indent + '],\n')
elif isinstance(item, basestring):
stdout.write(
'\'%s\',\n' % item.replace('\\', '\\\\').replace('\'', '\\\''))
elif isinstance(item, (int, bool)) or item is None:
stdout.write('%s,\n' % item)
else:
assert False, item
stdout.write('{\n')
loop_dict(' ', variables)
stdout.write('}\n')
def print_all(comment, data, stream):
"""Prints a complete .isolate file and its top-level file comment into a
stream.
"""
if comment:
stream.write(comment)
pretty_print(data, stream)
def extract_comment(content):
"""Extracts file level comment."""
out = []
for line in content.splitlines(True):
if line.startswith('#'):
out.append(line)
else:
break
return ''.join(out)
def eval_content(content):
"""Evaluates a python file and return the value defined in it.
Used in practice for .isolate files.
"""
globs = {'__builtins__': None}
locs = {}
try:
value = eval(content, globs, locs)
except TypeError as e:
e.args = list(e.args) + [content]
raise
assert locs == {}, locs
assert globs == {'__builtins__': None}, globs
return value
def match_configs(expr, config_variables, all_configs):
"""Returns the list of values from |values| that match the condition |expr|.
Arguments:
expr: string that is evaluatable with eval(). It is a GYP condition.
config_variables: list of the name of the variables.
all_configs: list of the list of possible values.
If a variable is not referenced at all, it is marked as unbounded (free) with
a value set to None.
"""
# It is more than just eval'ing the variable, it needs to be double checked to
# see if the variable is referenced at all. If not, the variable is free
# (unbounded).
# TODO(maruel): Use the intelligent way by inspecting expr instead of doing
# trial and error to figure out which variable is bound.
combinations = []
for bound_variables in itertools.product(
(True, False), repeat=len(config_variables)):
# Add the combination of variables bound.
combinations.append(
(
[c for c, b in zip(config_variables, bound_variables) if b],
set(
tuple(v if b else None for v, b in zip(line, bound_variables))
for line in all_configs)
))
out = []
for variables, configs in combinations:
# Strip variables and see if expr can still be evaluated.
for values in configs:
globs = {'__builtins__': None}
globs.update(zip(variables, (v for v in values if v is not None)))
try:
assertion = eval(expr, globs, {})
except NameError:
continue
if not isinstance(assertion, bool):
raise IsolateError('Invalid condition')
if assertion:
out.append(values)
return out
def verify_variables(variables):
"""Verifies the |variables| dictionary is in the expected format."""
VALID_VARIABLES = [
'command',
'files',
'read_only',
]
assert isinstance(variables, dict), variables
assert set(VALID_VARIABLES).issuperset(set(variables)), variables.keys()
for name, value in variables.iteritems():
if name == 'read_only':
assert value in (0, 1, 2, None), value
else:
assert isinstance(value, list), value
assert all(isinstance(i, basestring) for i in value), value
def verify_ast(expr, variables_and_values):
"""Verifies that |expr| is of the form
expr ::= expr ( "or" | "and" ) expr
| identifier "==" ( string | int )
Also collects the variable identifiers and string/int values in the dict
|variables_and_values|, in the form {'var': set([val1, val2, ...]), ...}.
"""
assert isinstance(expr, (ast.BoolOp, ast.Compare))
if isinstance(expr, ast.BoolOp):
assert isinstance(expr.op, (ast.And, ast.Or))
for subexpr in expr.values:
verify_ast(subexpr, variables_and_values)
else:
assert isinstance(expr.left.ctx, ast.Load)
assert len(expr.ops) == 1
assert isinstance(expr.ops[0], ast.Eq)
var_values = variables_and_values.setdefault(expr.left.id, set())
rhs = expr.comparators[0]
assert isinstance(rhs, (ast.Str, ast.Num))
var_values.add(rhs.n if isinstance(rhs, ast.Num) else rhs.s)
def verify_condition(condition, variables_and_values):
"""Verifies the |condition| dictionary is in the expected format.
See verify_ast() for the meaning of |variables_and_values|.
"""
VALID_INSIDE_CONDITION = ['variables']
assert isinstance(condition, list), condition
assert len(condition) == 2, condition
expr, then = condition
test_ast = compile(expr, '<condition>', 'eval', ast.PyCF_ONLY_AST)
verify_ast(test_ast.body, variables_and_values)
assert isinstance(then, dict), then
assert set(VALID_INSIDE_CONDITION).issuperset(set(then)), then.keys()
if not 'variables' in then:
raise IsolateError('Missing \'variables\' in condition %s' % condition)
verify_variables(then['variables'])
def verify_root(value, variables_and_values):
"""Verifies that |value| is the parsed form of a valid .isolate file.
See verify_ast() for the meaning of |variables_and_values|.
"""
VALID_ROOTS = ['includes', 'conditions', 'variables']
assert isinstance(value, dict), value
assert set(VALID_ROOTS).issuperset(set(value)), value.keys()
includes = value.get('includes', [])
assert isinstance(includes, list), includes
for include in includes:
assert isinstance(include, basestring), include
conditions = value.get('conditions', [])
assert isinstance(conditions, list), conditions
for condition in conditions:
verify_condition(condition, variables_and_values)
variables = value.get('variables', {})
verify_variables(variables)
def get_folders(values_dict):
"""Returns a dict of all the folders in the given value_dict."""
return dict(
(item, configs) for (item, configs) in values_dict.iteritems()
if item.endswith('/')
)
class ConfigSettings(object):
"""Represents the dependency variables for a single build configuration.
The structure is immutable.
.command and .isolate_dir describe how to run the command. .isolate_dir uses
the OS' native path separator. It must be an absolute path, it's the path
where to start the command from.
.files is the list of dependencies. The items use '/' as a path separator.
.read_only describe how to map the files.
"""
def __init__(self, values, isolate_dir):
verify_variables(values)
if isolate_dir is None:
# It must be an empty object if isolate_dir is None.
assert values == {}, values
else:
# Otherwise, the path must be absolute.
assert os.path.isabs(isolate_dir), isolate_dir
self.files = sorted(values.get('files', []))
self.command = values.get('command', [])[:]
self.isolate_dir = isolate_dir
self.read_only = values.get('read_only')
def union(self, rhs):
"""Merges two config settings together into a new instance.
A new instance is not created and self or rhs is returned if the other
object is the empty object.
self has priority over rhs for .command. Use the same .isolate_dir as the
one having a .command.
Dependencies listed in rhs are patch adjusted ONLY if they don't start with
a path variable, e.g. the characters '<('.
"""
# When an object has .isolate_dir == None, it means it is the empty object.
if rhs.isolate_dir is None:
return self
if self.isolate_dir is None:
return rhs
if sys.platform == 'win32':
assert self.isolate_dir[0].lower() == rhs.isolate_dir[0].lower()
# Takes the difference between the two isolate_dir. Note that while
# isolate_dir is in native path case, all other references are in posix.
l_rel_cwd, r_rel_cwd = self.isolate_dir, rhs.isolate_dir
if self.command or rhs.command:
use_rhs = bool(not self.command and rhs.command)
else:
# If self doesn't define any file, use rhs.
use_rhs = not bool(self.files)
if use_rhs:
# Rebase files in rhs.
l_rel_cwd, r_rel_cwd = r_rel_cwd, l_rel_cwd
rebase_path = os.path.relpath(r_rel_cwd, l_rel_cwd).replace(
os.path.sep, '/')
def rebase_item(f):
if f.startswith('<(') or rebase_path == '.':
return f
return posixpath.join(rebase_path, f)
def map_both(l, r):
"""Rebase items in either lhs or rhs, as needed."""
if use_rhs:
l, r = r, l
return sorted(l + map(rebase_item, r))
var = {
'command': self.command or rhs.command,
'files': map_both(self.files, rhs.files),
'read_only': rhs.read_only if self.read_only is None else self.read_only,
}
return ConfigSettings(var, l_rel_cwd)
def flatten(self):
"""Converts the object into a dict."""
out = {}
if self.command:
out['command'] = self.command
if self.files:
out['files'] = self.files
if self.read_only is not None:
out['read_only'] = self.read_only
# TODO(maruel): Probably better to not output it if command is None?
if self.isolate_dir is not None:
out['isolate_dir'] = self.isolate_dir
return out
def __str__(self):
"""Returns a short representation useful for debugging."""
files = ''.join('\n ' + f for f in self.files)
return 'ConfigSettings(%s, %s, %s, %s)' % (
self.command,
self.isolate_dir,
self.read_only,
files or '[]')
def _safe_index(l, k):
try:
return l.index(k)
except ValueError:
return None
def _get_map_keys(dest_keys, in_keys):
"""Returns a tuple of the indexes of each item in in_keys found in dest_keys.
For example, if in_keys is ('A', 'C') and dest_keys is ('A', 'B', 'C'), the
return value will be (0, None, 1).
"""
return tuple(_safe_index(in_keys, k) for k in dest_keys)
def _map_keys(mapping, items):
"""Returns a tuple with items placed at mapping index.
For example, if mapping is (1, None, 0) and items is ('a', 'b'), it will
return ('b', None, 'c').
"""
return tuple(items[i] if i != None else None for i in mapping)
class Configs(object):
"""Represents a processed .isolate file.
Stores the file in a processed way, split by configuration.
At this point, we don't know all the possibilities. So mount a partial view
that we have.
This class doesn't hold isolate_dir, since it is dependent on the final
configuration selected. It is implicitly dependent on which .isolate defines
the 'command' that will take effect.
"""
def __init__(self, file_comment, config_variables):
self.file_comment = file_comment
# Contains the names of the config variables seen while processing
# .isolate file(s). The order is important since the same order is used for
# keys in self._by_config.
assert isinstance(config_variables, tuple)
assert all(isinstance(c, basestring) for c in config_variables), (
config_variables)
config_variables = tuple(config_variables)
assert tuple(sorted(config_variables)) == config_variables, config_variables
self._config_variables = config_variables
# The keys of _by_config are tuples of values for each of the items in
# self._config_variables. A None item in the list of the key means the value
# is unbounded.
self._by_config = {}
@property
def config_variables(self):
return self._config_variables
def get_config(self, config):
"""Returns all configs that matches this config as a single ConfigSettings.
Returns an empty ConfigSettings if none apply.
"""
# TODO(maruel): Fix ordering based on the bounded values. The keys are not
# necessarily sorted in the way that makes sense, they are alphabetically
# sorted. It is important because the left-most takes predescence.
out = ConfigSettings({}, None)
for k, v in sorted(self._by_config.iteritems()):
if all(i == j or j is None for i, j in zip(config, k)):
out = out.union(v)
return out
def set_config(self, key, value):
"""Sets the ConfigSettings for this key.
The key is a tuple of bounded or unbounded variables. The global variable
is the key where all values are unbounded, e.g.:
(None,) * len(self._config_variables)
"""
assert key not in self._by_config, (key, self._by_config.keys())
assert isinstance(key, tuple)
assert len(key) == len(self._config_variables), (
key, self._config_variables)
assert isinstance(value, ConfigSettings)
self._by_config[key] = value
def union(self, rhs):
"""Returns a new Configs instance, the union of variables from self and rhs.
Uses self.file_comment if available, otherwise rhs.file_comment.
It keeps config_variables sorted in the output.
"""
# Merge the keys of config_variables for each Configs instances. All the new
# variables will become unbounded. This requires realigning the keys.
config_variables = tuple(sorted(
set(self.config_variables) | set(rhs.config_variables)))
out = Configs(self.file_comment or rhs.file_comment, config_variables)
mapping_lhs = _get_map_keys(out.config_variables, self.config_variables)
mapping_rhs = _get_map_keys(out.config_variables, rhs.config_variables)
lhs_config = dict(
(_map_keys(mapping_lhs, k), v) for k, v in self._by_config.iteritems())
# pylint: disable=W0212
rhs_config = dict(
(_map_keys(mapping_rhs, k), v) for k, v in rhs._by_config.iteritems())
for key in set(lhs_config) | set(rhs_config):
l = lhs_config.get(key)
r = rhs_config.get(key)
out.set_config(key, l.union(r) if (l and r) else (l or r))
return out
def flatten(self):
"""Returns a flat dictionary representation of the configuration.
"""
return dict((k, v.flatten()) for k, v in self._by_config.iteritems())
def __str__(self):
return 'Configs(%s,%s)' % (
self._config_variables,
''.join('\n %s' % str(f) for f in self._by_config))
def load_included_isolate(isolate_dir, isolate_path):
if os.path.isabs(isolate_path):
raise IsolateError(
'Failed to load configuration; absolute include path \'%s\'' %
isolate_path)
included_isolate = os.path.normpath(os.path.join(isolate_dir, isolate_path))
if sys.platform == 'win32':
if included_isolate[0].lower() != isolate_dir[0].lower():
raise IsolateError(
'Can\'t reference a .isolate file from another drive')
with fs.open(included_isolate, 'r') as f:
return load_isolate_as_config(
os.path.dirname(included_isolate),
eval_content(f.read()),
None)
def load_isolate_as_config(isolate_dir, value, file_comment):
"""Parses one .isolate file and returns a Configs() instance.
Arguments:
isolate_dir: only used to load relative includes so it doesn't depend on
cwd.
value: is the loaded dictionary that was defined in the gyp file.
file_comment: comments found at the top of the file so it can be preserved.
The expected format is strict, anything diverting from the format below will
throw an assert:
{
'includes': [
'foo.isolate',
],
'conditions': [
['OS=="vms" and foo=42', {
'variables': {
'command': [
...
],
'files': [
...
],
'read_only': 0,
},
}],
...
],
'variables': {
...
},
}
"""
assert os.path.isabs(isolate_dir), isolate_dir
if any(len(cond) == 3 for cond in value.get('conditions', [])):
raise IsolateError('Using \'else\' is not supported anymore.')
variables_and_values = {}
verify_root(value, variables_and_values)
if variables_and_values:
config_variables, config_values = zip(
*sorted(variables_and_values.iteritems()))
all_configs = list(itertools.product(*config_values))
else:
config_variables = ()
all_configs = []
isolate = Configs(file_comment, config_variables)
# Add global variables. The global variables are on the empty tuple key.
isolate.set_config(
(None,) * len(config_variables),
ConfigSettings(value.get('variables', {}), isolate_dir))
# Add configuration-specific variables.
for expr, then in value.get('conditions', []):
configs = match_configs(expr, config_variables, all_configs)
new = Configs(None, config_variables)
for config in configs:
new.set_config(config, ConfigSettings(then['variables'], isolate_dir))
isolate = isolate.union(new)
# If the .isolate contains command, ignore any command in child .isolate.
root_has_command = any(c.command for c in isolate._by_config.itervalues())
# Load the includes. Process them in reverse so the last one take precedence.
for include in reversed(value.get('includes', [])):
included = load_included_isolate(isolate_dir, include)
if root_has_command:
# Strip any command in the imported isolate. It is because the chosen
# command is not related to the one in the top-most .isolate, since the
# configuration is flattened.
for c in included._by_config.itervalues():
c.command = []
isolate = isolate.union(included)
return isolate
def load_isolate_for_config(isolate_dir, content, config_variables):
"""Loads the .isolate file and returns the information unprocessed but
filtered for the specific OS.
Returns:
tuple of command, dependencies, read_only flag, isolate_dir.
The dependencies are fixed to use os.path.sep.
"""
# Load the .isolate file, process its conditions, retrieve the command and
# dependencies.
isolate = load_isolate_as_config(isolate_dir, eval_content(content), None)
try:
config_name = tuple(
config_variables[var] for var in isolate.config_variables)
except KeyError:
raise IsolateError(
'These configuration variables were missing from the command line: %s' %
', '.join(
sorted(set(isolate.config_variables) - set(config_variables))))
# A configuration is to be created with all the combinations of free
# variables.
config = isolate.get_config(config_name)
dependencies = [f.replace('/', os.path.sep) for f in config.files]
return config.command, dependencies, config.read_only, config.isolate_dir