Skip to content

Commit

Permalink
Merge branch 'ershi/fix-inplace-ops' into 'main'
Browse files Browse the repository at this point in the history
Fix code generation of in-place multiply and divide

See merge request omniverse/warp!842
  • Loading branch information
mmacklin committed Nov 7, 2024
2 parents 29421d1 + a02f499 commit b08f296
Show file tree
Hide file tree
Showing 4 changed files with 119 additions and 49 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
- Fix an incorrect user function being sometimes resolved when multiple overloads are available with array parameters with different `dtype` values.
- Fix error being raised when static and dynamic for-loops are written in sequence with the same iteration variable names ([GH-331](https://github.com/NVIDIA/warp/issues/331)).
- Fix an issue with the `Texture Write` node, used in the Mandelbrot Omniverse sample, sometimes erroring out in multi-GPU environments.
- Code generation of in-place multiplication and division operations (regression introduced in a69d061)([GH-342](https://github.com/NVIDIA/warp/issues/342)).

## [1.4.1] - 2024-10-15

Expand Down
28 changes: 28 additions & 0 deletions docs/modules/differentiability.rst
Original file line number Diff line number Diff line change
Expand Up @@ -893,6 +893,34 @@ Warp uses a source-code transformation approach to auto-differentiation.
In this approach, the backwards pass must keep a record of intermediate values computed during the forward pass.
This imposes some restrictions on what kernels can do if they are to remain differentiable.

In-Place Math Operations
^^^^^^^^^^^^^^^^^^^^^^^^

In-place addition and subtraction can be used in kernels participating in the backward pass, e.g.

.. code-block:: python
@wp.kernel
def inplace(a: wp.array(dtype=float), b: wp.array(dtype=float)):
i = wp.tid()
a[i] -= b[i]
a = wp.full(10, value=10.0, dtype=float, requires_grad=True)
b = wp.full(10, value=2.0, dtype=float, requires_grad=True)
with wp.Tape() as tape:
wp.launch(inplace, a.shape, inputs=[a, b])
tape.backward(grads={a: wp.ones_like(a)})
print(a.grad) # [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
print(b.grad) # [-1. -1. -1. -1. -1. -1. -1. -1. -1. -1.]
In-place multiplication and division are *not* supported and incorrect results will be obtained in the backward pass.
A warning will be emitted during code generation if ``wp.config.verbose = True``.

Dynamic Loops
^^^^^^^^^^^^^
Currently, dynamic loops are not replayed or unrolled in the backward pass, meaning intermediate values that are
Expand Down
5 changes: 4 additions & 1 deletion warp/codegen.py
Original file line number Diff line number Diff line change
Expand Up @@ -2576,7 +2576,10 @@ def make_new_assign_statement():
if warp.config.verify_autograd_array_access:
target.mark_write(kernel_name=kernel_name, filename=filename, lineno=lineno)
else:
print(f"Warning: in-place op {node.op} is not differentiable")
if warp.config.verbose:
print(f"Warning: in-place op {node.op} is not differentiable")
make_new_assign_statement()
return

# TODO
elif type_is_vector(target_type) or type_is_quaternion(target_type) or type_is_matrix(target_type):
Expand Down
134 changes: 86 additions & 48 deletions warp/tests/test_array.py
Original file line number Diff line number Diff line change
Expand Up @@ -2361,64 +2361,75 @@ def first_row_plus_one(x: wp.array2d(dtype=float)):
assert_np_equal(arr_warp.numpy(), np.array([[2, 1, 1], [1, 0, 0], [1, 0, 0]]))


def test_array_inplace_ops(test, device):
@wp.kernel
def inplace_add_1d(x: wp.array(dtype=float), y: wp.array(dtype=float)):
i = wp.tid()
x[i] += y[i]
@wp.kernel
def inplace_add_1d(x: wp.array(dtype=float), y: wp.array(dtype=float)):
i = wp.tid()
x[i] += y[i]

@wp.kernel
def inplace_add_2d(x: wp.array2d(dtype=float), y: wp.array2d(dtype=float)):
i, j = wp.tid()
x[i, j] += y[i, j]

@wp.kernel
def inplace_add_3d(x: wp.array3d(dtype=float), y: wp.array3d(dtype=float)):
i, j, k = wp.tid()
x[i, j, k] += y[i, j, k]
@wp.kernel
def inplace_add_2d(x: wp.array2d(dtype=float), y: wp.array2d(dtype=float)):
i, j = wp.tid()
x[i, j] += y[i, j]

@wp.kernel
def inplace_add_4d(x: wp.array4d(dtype=float), y: wp.array4d(dtype=float)):
i, j, k, l = wp.tid()
x[i, j, k, l] += y[i, j, k, l]

@wp.kernel
def inplace_sub_1d(x: wp.array(dtype=float), y: wp.array(dtype=float)):
i = wp.tid()
x[i] -= y[i]
@wp.kernel
def inplace_add_3d(x: wp.array3d(dtype=float), y: wp.array3d(dtype=float)):
i, j, k = wp.tid()
x[i, j, k] += y[i, j, k]

@wp.kernel
def inplace_sub_2d(x: wp.array2d(dtype=float), y: wp.array2d(dtype=float)):
i, j = wp.tid()
x[i, j] -= y[i, j]

@wp.kernel
def inplace_sub_3d(x: wp.array3d(dtype=float), y: wp.array3d(dtype=float)):
i, j, k = wp.tid()
x[i, j, k] -= y[i, j, k]
@wp.kernel
def inplace_add_4d(x: wp.array4d(dtype=float), y: wp.array4d(dtype=float)):
i, j, k, l = wp.tid()
x[i, j, k, l] += y[i, j, k, l]

@wp.kernel
def inplace_sub_4d(x: wp.array4d(dtype=float), y: wp.array4d(dtype=float)):
i, j, k, l = wp.tid()
x[i, j, k, l] -= y[i, j, k, l]

@wp.kernel
def inplace_add_vecs(x: wp.array(dtype=wp.vec3), y: wp.array(dtype=wp.vec3)):
i = wp.tid()
x[i] += y[i]
@wp.kernel
def inplace_sub_1d(x: wp.array(dtype=float), y: wp.array(dtype=float)):
i = wp.tid()
x[i] -= y[i]

@wp.kernel
def inplace_add_mats(x: wp.array(dtype=wp.mat33), y: wp.array(dtype=wp.mat33)):
i = wp.tid()
x[i] += y[i]

@wp.kernel
def inplace_add_rhs(x: wp.array(dtype=float), y: wp.array(dtype=float), z: wp.array(dtype=float)):
i = wp.tid()
a = y[i]
a += x[i]
wp.atomic_add(z, 0, a)
@wp.kernel
def inplace_sub_2d(x: wp.array2d(dtype=float), y: wp.array2d(dtype=float)):
i, j = wp.tid()
x[i, j] -= y[i, j]


@wp.kernel
def inplace_sub_3d(x: wp.array3d(dtype=float), y: wp.array3d(dtype=float)):
i, j, k = wp.tid()
x[i, j, k] -= y[i, j, k]


@wp.kernel
def inplace_sub_4d(x: wp.array4d(dtype=float), y: wp.array4d(dtype=float)):
i, j, k, l = wp.tid()
x[i, j, k, l] -= y[i, j, k, l]


@wp.kernel
def inplace_add_vecs(x: wp.array(dtype=wp.vec3), y: wp.array(dtype=wp.vec3)):
i = wp.tid()
x[i] += y[i]


@wp.kernel
def inplace_add_mats(x: wp.array(dtype=wp.mat33), y: wp.array(dtype=wp.mat33)):
i = wp.tid()
x[i] += y[i]


@wp.kernel
def inplace_add_rhs(x: wp.array(dtype=float), y: wp.array(dtype=float), z: wp.array(dtype=float)):
i = wp.tid()
a = y[i]
a += x[i]
wp.atomic_add(z, 0, a)


def test_array_inplace_diff_ops(test, device):
N = 3
x1 = wp.ones(N, dtype=float, requires_grad=True, device=device)
x2 = wp.ones((N, N), dtype=float, requires_grad=True, device=device)
Expand Down Expand Up @@ -2528,6 +2539,32 @@ def inplace_add_rhs(x: wp.array(dtype=float), y: wp.array(dtype=float), z: wp.ar
assert_np_equal(y.grad.numpy(), np.ones(1, dtype=float))


@wp.kernel
def inplace_mul_1d(x: wp.array(dtype=float), y: wp.array(dtype=float)):
i = wp.tid()
x[i] *= y[i]


@wp.kernel
def inplace_div_1d(x: wp.array(dtype=float), y: wp.array(dtype=float)):
i = wp.tid()
x[i] /= y[i]


def test_array_inplace_non_diff_ops(test, device):
N = 3
x1 = wp.full(N, value=10.0, dtype=float, device=device)
y1 = wp.full(N, value=5.0, dtype=float, device=device)

wp.launch(inplace_mul_1d, N, inputs=[x1, y1], device=device)
assert_np_equal(x1.numpy(), np.full(N, fill_value=50.0, dtype=float))

x1.fill_(10.0)
y1.fill_(5.0)
wp.launch(inplace_div_1d, N, inputs=[x1, y1], device=device)
assert_np_equal(x1.numpy(), np.full(N, fill_value=2.0, dtype=float))


@wp.kernel
def inc_scalar(a: wp.array(dtype=float)):
tid = wp.tid()
Expand Down Expand Up @@ -2750,7 +2787,8 @@ def test_array_new_del(self):
add_function_test(TestArray, "test_array_aliasing_from_numpy", test_array_aliasing_from_numpy, devices=["cpu"])
add_function_test(TestArray, "test_numpy_array_interface", test_numpy_array_interface, devices=["cpu"])

add_function_test(TestArray, "test_array_inplace_ops", test_array_inplace_ops, devices=devices)
add_function_test(TestArray, "test_array_inplace_diff_ops", test_array_inplace_diff_ops, devices=devices)
add_function_test(TestArray, "test_array_inplace_non_diff_ops", test_array_inplace_non_diff_ops, devices=devices)
add_function_test(TestArray, "test_direct_from_numpy", test_direct_from_numpy, devices=["cpu"])
add_function_test(TestArray, "test_kernel_array_from_ptr", test_kernel_array_from_ptr, devices=devices)

Expand Down

0 comments on commit b08f296

Please sign in to comment.