forked from pytorch/xla
-
Notifications
You must be signed in to change notification settings - Fork 0
/
configuration.yaml
409 lines (409 loc) · 14.2 KB
/
configuration.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
---
variables:
pjrt_variables:
PJRT_DEVICE:
description:
- Indicates which device is being used with PJRT. It can be either CPU,
TPU, or CUDA
type: string
PJRT_SELECT_DEFAULT_DEVICE:
description:
- Whether or not to select a default PJRT device based on the environment
if the runtime is not already configured.
PJRT_CPU_ASYNC_CLIENT:
description:
- Whether or not to create an async PJRT client for the CPU device(s).
type: bool
default_value: false
PJRT_GPU_ASYNC_CLIENT:
description:
- Whether or not to create an async PJRT client for the GPU device(s).
type: bool
default_value: false
PJRT_TPU_MAX_INFLIGHT_COMPUTATIONS:
description:
- Max inflight computations that the PJRT client can handle for TPU.
type: int
default_value: 32
build_variables:
DEBUG:
description:
- Whether or not to build pytorch/xla in the debug mode.
type: bool
default_value: false
GRPC_VERBOSITY:
description:
- Verbosity level for GRPC, e.g. INFO, ERROR, etc.
type: string
default_value: "ERROR"
XLA_CUDA:
description:
- Build the xla client with CUDA enabled.
type: bool
default_value: false
GIT_VERSIONED_XLA_BUILD:
description:
- Creates a versioned build. In particular, appends a git sha to the
version number string
type: bool
default_value: false
COMPILE_PARALLEL:
description:
- Enabled parallel compile for PyTorch/XLA build.
type: bool
default_value: true
BUILD_CPP_TESTS:
description:
- Whether or not to build the cpp tests.
type: bool
default_value: true
BUNDLE_LIBTPU:
description:
- Whether or not to include libtpu in the final wheel.
type: bool
default_value: false
ALLOW_MULTIPLE_LIBTPU_LOAD:
description:
- Allow for multiple processes to load libtpu at the same time.
type: bool
default_value: true
PT_XLA_DEBUG:
description:
- Used to automatically analyze the metrics report and provide a
summary.
type: bool
default_value: false
TPUVM_MODE:
description:
- Include some additional TPUVM features and code when building the
third party tensorflow.
type: bool
default_value: false
TORCH_XLA_VERSION:
description:
- Specifies the version of PyTorch/XLA, rather than the hard-coded
version in setup.py; used when we're building binaries for
distribution. Should be parseable as a version number, e.g. 1.14
type: string
default_value: "1.14"
TORCH_XLA_PACKAGE_NAME:
description:
- Allows the developer to change the package name to something other
than torch_xla.
type: string
default_value: "torch_xla"
TPU_ML_PLATFORM:
description:
- Name of the ML platform being used on TPU, e.g. PyTorch/XLA,
Tensorflow, or JAX.
type: string
default_value: "PyTorch/XLA"
XLA_BAZEL_VERBOSE:
description:
- Turn on verbose messages during the bazel build of the xla/xrt client.
type: bool
default_value: false
feature_variables:
XLA_SYNC_WAIT:
description:
- Forces the XLA tensor sync operation to wait for its completion,
before moving to the next step.
type: bool
default_value: false
XLA_NO_SPECIAL_SCALARS:
description:
- When set to false, this will route some tensor values to constant
scalars.
type: bool
default_value: false
XLA_TENSOR_UPDATE_SYNC:
description:
- Used to decide whether or not to sync update in
XLANativeFunctions::_copy_from.
type: bool
default_value: true
XLA_USE_BF16:
description:
- Tensor arithmetic will be done in reduced precision and so tensors
will not be accurate if accumulated over time.
type: bool
default_value: false
XLA_USE_F16:
description:
- If set to true, transforms all the PyTorch Float values into Float16
(PyTorch Half type) when sending to devices which supports them.
type: bool
default_value: false
XLA_USE_32BIT_LONG:
description:
- If set to true, maps PyTorch Long types to XLA 32bit type. On the
versions of the TPU HW at the time of writing, 64bit integer
computations are expensive, so setting this flag might help. It
should be verified by the user that truncating to 32bit values is a
valid operation according to the use of PyTorch Long values in it.
type: bool
default_value: false
XLA_IO_THREAD_POOL_SIZE:
description:
- Number of threads for the IO thread pool in the XLA client. Defaults
to std::thread::hardware_concurrency().
type: int
XLA_TENSOR_ALLOCATOR_MAXSIZE:
description:
- Max cache size to be used by TensorAllocator in XRT. We only cache
blocks smaller than this number, measured in bytes.
type: int
default_value: 1000000000
XLA_IR_SHAPE_CACHE_SIZE:
description:
- Size for the shape cache used by XLA.
type: int
default_value: 12288
XLA_DEVDATA_CACHE_SIZE:
description:
- Max cache size for XLA Data cache.
type: int
default_value: 128
XLA_TRIM_GRAPH_CHECK_FREQUENCY:
description:
- Frequency to check and, if applicable, trim the IR graph.
type: int
default_value: 5000
XLA_DENSE_GATHER_FACTOR:
description:
- Used as a threshold for when we should use dense gather. We multiply
the factor by 10, and compare it to the number of input elements. If
there are more input elements, we'll use sparse gather.
type: int
default_value: 8192
XLA_DENSE_SCATTER_FACTOR:
description:
- Used as a threshold to determine when to use dense scatter. If the
dense scatter factor times the number of index elements is greater
than or equal to the number of input elements, we use dense scatter
type: int
default_value: 100
XLA_RESIZE_SPLIT_FACTOR:
description:
- Used as a threshold to determine when the resize is too large to be
done all at once, in which case we do one dimension at a time.
type: float
default_value: 3.0
XLA_MAX_PADDING_FACTOR:
description:
- Used as a threshold to determine whether to use a sorted or
descending layout for shape.
type: float
default_value: 1.25
XLA_LAYOUTS:
description:
- A list of key/value pairs of the format "k1=v1;k2=v2". Keys are
Shapes and values are layouts.
type: string
default_value: ""
XLA_RNG_BIT_GENERATOR:
description:
- String name of the bit generator type, which can be either default,
philox, or three_fry. No default value because in that case there's
special behavior.
type: string
XLA_EXPERIMENTAL:
description:
- Used to enable experimental features. Representing a list separated
by ":".
type: string
default_value: ""
XLA_FLAGS:
description:
- List of flags used by XLA, separated by " ". This is only referenced
in PyTorch/XLA to add flags to the list.
type: string
default_value: ""
DISABLE_NUMERIC_CC_TOKEN:
description:
- Whether or not to skip modifying the existing token based on the
XlaOp when creating a new token. When disabled, the same token is
used for every XlaOp.
type: bool
default_value: false
XLA_USE_SPMD:
description:
- Deprecated. Whether or not to use the SPMD virtual device optimization.
Use `torch_xla.runtime.use_spmd()` instead.
type: bool
default_value: false
SPLIT_EXECUTOR_CACHE_SIZE:
description:
- Compiler cache size for the op by op executor.
type: int
default_value: 2048
device_variables:
TPU_NUM_DEVICES:
description:
- Number of TPU devices being used by this instance of XRT.
type: int
default_value: 8
CPU_NUM_DEVICES:
description:
- Number of CPU devices being used by this instance of XRT.
type: int
GPU_NUM_DEVICES:
description:
- Number of GPU devices being used by this instance of XRT.
type: int
debug_variables:
XLA_FNTRACKER_FILE:
description:
- If set, the path to a file where output from the function tracker
should be written.
type: string
default_value: ""
XLA_FNTRACKER_LIST:
description:
- Tags for the tracker context, which tell the function tracker which
functions to track.
type: string
default_value: ""
XLA_METRICS_SAMPLES:
description:
- Max samples to use for any metric.
type: int
default_value: 1024
XLA_COMPILE_TIME_THRESHOLD:
description:
- Threshold that determines when we log a slow compilation to the hlo
folder. Defaults to std::numeric_limits<double>::max().
type: int
XLA_FNTRACKER_LEVEL:
description:
- Level for the tracker context. When tracking functions, only
functions with a level less than or equal to this level will get
tracked. Defaults to std::numeric_limits<int>::max().
type: int
XLA_SAVE_TENSORS_FILE:
description:
- The path to a file which will be used to dump the IR graphs during
execution. Note that the file can become really big if the option is
left enabled and the PyTorch program let run for long time. The
graphs are appended to the file, so to have a clean sheet from run to
run, the file should be explicitly removed.
type: string
default_value: ""
XLA_SAVE_TENSORS_FMT:
description:
- The format of the graphs stored within the XLA_SAVE_TENSORS_FILE
file. Can be text (the default), dot (the Graphviz format) or hlo.
type: string
default_value: "text"
XLA_METRICS_FILE:
description:
- If set, the path to a local file where the internal metrics will be
saved at every step. Metrics will be appended to the file, if already
existing. Internally defaults to "None".
type: string
XLA_SAVE_HLO_FILE:
description:
- If set, the path to a local file where, in case of
compilation/execution error, the offending HLO graph will be saved.
type: string
default_value: ""
XLA_SLOW_COMPILE_HLO_FOLDER:
description:
- Folder name to save HLO files to, when the compile time is above a
certain threshold.
type: string
default_value: ""
XLA_TEST_DUMP_GRAPHS:
description:
- Type of graph to print to std::cerr. It can be empty, text, hlo, or
dot.
type: string
default_value: ""
PT_XLA_DEBUG_FILE:
description:
- If set, filepath used for printing out reports.
type: string
default_value: ""
TF_CPP_VMODULE:
description:
- Environment variable used for TF VLOGs and takes the form of
TF_CPP_VMODULE=name=value,.... Note that for VLOGs you must set
TF_CPP_MIN_LOG_LEVEL=0. For PyTorch/XLA using a configuration like
TF_CPP_VMODULE=tensor=5 would enable logging.
type: string
TF_CPP_MIN_LOG_LEVEL:
description:
- Level to print messages for. TF_CPP_MIN_LOG_LEVEL=0 will turn on INFO
logging, TF_CPP_MIN_LOG_LEVEL=1 WARNING and so on. Our PyTorch/XLA
TF_VLOG uses tensorflow::INFO level by default so to see VLOGs set
TF_CPP_MIN_LOG_LEVEL=0.
type: int
default_value: 1
TF_CPP_LOG_THREAD_ID:
description:
- If set to true, the TF logs will show the thread ID helping with
debugging multithreaded processes.
type: bool
default_value: false
TORCH_TEST_DEVICES:
description:
- Provided by the upstream and used to test new device types.
type: string
XLA_IR_DEBUG:
description:
- Enables the Python stack trace to be captured where creating IR
nodes, hence allowing to understand which PyTorch operation was
responsible for generating the IR.
type: bool
default_value: false
XLA_HLO_DEBUG:
description:
- Enables the Python stack frame captured when XLA_IR_DEBUG is active,
to be propagated to the XLA HLO metadata.
type: bool
default_value: false
XLA_TEST_DUMP_METRICS:
description:
- Controls whether or not metrics are dumped in cpp test tear down.
type: bool
default_value: false
XLA_TEST_DUMP_TENSORS:
description:
- Whether or not to print tensors to std::cerr in CPP tests.
type: bool
default_value: false
XLA_DUMP_HLO_GRAPH:
description:
- If set to true in case of a compilation or execution error the
offending HLO graph will be dumped as part of the runtime error
raised by xla_util.cc.
type: bool
default_value: false
XLA_DUMP_FATAL_STACK:
description:
- Installs the stack trace handler upon XLA client creation.
type: bool
default_value: false
XLA_METRICS_PERCENTILES:
description:
- List of metrics percentiles to record.
type: string
default_value: "0.01:0.05:0.1:0.2:0.5:0.8:0.9:0.95:0.99"
XLA_RELEASE_GIL_DURING_TRANSFER:
description:
- Release Python's GIL when transferring data from the runtime.
type: bool
default_value: true
XLA_STABLEHLO_COMPILE:
description:
- Pass StableHLO to XLA PjRt client for compilation. This compilation
flag is experimental. The default_value will be set to true when
StableHLO workflow is mature.
type: bool
default_value: false
XLA_DUMP_POST_OPTIMIZATIONS:
description:
- Dump the HLO graph after optimizations. You need to use it together
with XLA_SAVE_TENSORS_FMT='hlo' and XLA_SAVE_TENSORS_FILE='your/location'.
type: bool
default_value: false