diff --git a/CHANGELOG.md b/CHANGELOG.md index 5695299d..7e9f9b7a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -22,6 +22,7 @@ - Fix an incorrect user function being sometimes resolved when multiple overloads are available with array parameters with different `dtype` values. - Fix error being raised when static and dynamic for-loops are written in sequence with the same iteration variable names ([GH-331](https://github.com/NVIDIA/warp/issues/331)). - Fix an issue with the `Texture Write` node, used in the Mandelbrot Omniverse sample, sometimes erroring out in multi-GPU environments. +- Code generation of in-place multiplication and division operations (regression introduced in a69d061)([GH-342](https://github.com/NVIDIA/warp/issues/342)). ## [1.4.1] - 2024-10-15 diff --git a/docs/modules/differentiability.rst b/docs/modules/differentiability.rst index d3db5c53..a6a3a15a 100644 --- a/docs/modules/differentiability.rst +++ b/docs/modules/differentiability.rst @@ -893,6 +893,34 @@ Warp uses a source-code transformation approach to auto-differentiation. In this approach, the backwards pass must keep a record of intermediate values computed during the forward pass. This imposes some restrictions on what kernels can do if they are to remain differentiable. +In-Place Math Operations +^^^^^^^^^^^^^^^^^^^^^^^^ + +In-place addition and subtraction can be used in kernels participating in the backward pass, e.g. + +.. code-block:: python + + @wp.kernel + def inplace(a: wp.array(dtype=float), b: wp.array(dtype=float)): + i = wp.tid() + + a[i] -= b[i] + + + a = wp.full(10, value=10.0, dtype=float, requires_grad=True) + b = wp.full(10, value=2.0, dtype=float, requires_grad=True) + + with wp.Tape() as tape: + wp.launch(inplace, a.shape, inputs=[a, b]) + + tape.backward(grads={a: wp.ones_like(a)}) + + print(a.grad) # [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] + print(b.grad) # [-1. -1. -1. -1. -1. -1. -1. -1. -1. -1.] + +In-place multiplication and division are *not* supported and incorrect results will be obtained in the backward pass. +A warning will be emitted during code generation if ``wp.config.verbose = True``. + Dynamic Loops ^^^^^^^^^^^^^ Currently, dynamic loops are not replayed or unrolled in the backward pass, meaning intermediate values that are diff --git a/warp/codegen.py b/warp/codegen.py index 51c98c72..5a800888 100644 --- a/warp/codegen.py +++ b/warp/codegen.py @@ -2576,7 +2576,10 @@ def make_new_assign_statement(): if warp.config.verify_autograd_array_access: target.mark_write(kernel_name=kernel_name, filename=filename, lineno=lineno) else: - print(f"Warning: in-place op {node.op} is not differentiable") + if warp.config.verbose: + print(f"Warning: in-place op {node.op} is not differentiable") + make_new_assign_statement() + return # TODO elif type_is_vector(target_type) or type_is_quaternion(target_type) or type_is_matrix(target_type): diff --git a/warp/tests/test_array.py b/warp/tests/test_array.py index 3ffddb71..66ad8f12 100644 --- a/warp/tests/test_array.py +++ b/warp/tests/test_array.py @@ -2361,64 +2361,75 @@ def first_row_plus_one(x: wp.array2d(dtype=float)): assert_np_equal(arr_warp.numpy(), np.array([[2, 1, 1], [1, 0, 0], [1, 0, 0]])) -def test_array_inplace_ops(test, device): - @wp.kernel - def inplace_add_1d(x: wp.array(dtype=float), y: wp.array(dtype=float)): - i = wp.tid() - x[i] += y[i] +@wp.kernel +def inplace_add_1d(x: wp.array(dtype=float), y: wp.array(dtype=float)): + i = wp.tid() + x[i] += y[i] - @wp.kernel - def inplace_add_2d(x: wp.array2d(dtype=float), y: wp.array2d(dtype=float)): - i, j = wp.tid() - x[i, j] += y[i, j] - @wp.kernel - def inplace_add_3d(x: wp.array3d(dtype=float), y: wp.array3d(dtype=float)): - i, j, k = wp.tid() - x[i, j, k] += y[i, j, k] +@wp.kernel +def inplace_add_2d(x: wp.array2d(dtype=float), y: wp.array2d(dtype=float)): + i, j = wp.tid() + x[i, j] += y[i, j] - @wp.kernel - def inplace_add_4d(x: wp.array4d(dtype=float), y: wp.array4d(dtype=float)): - i, j, k, l = wp.tid() - x[i, j, k, l] += y[i, j, k, l] - @wp.kernel - def inplace_sub_1d(x: wp.array(dtype=float), y: wp.array(dtype=float)): - i = wp.tid() - x[i] -= y[i] +@wp.kernel +def inplace_add_3d(x: wp.array3d(dtype=float), y: wp.array3d(dtype=float)): + i, j, k = wp.tid() + x[i, j, k] += y[i, j, k] - @wp.kernel - def inplace_sub_2d(x: wp.array2d(dtype=float), y: wp.array2d(dtype=float)): - i, j = wp.tid() - x[i, j] -= y[i, j] - @wp.kernel - def inplace_sub_3d(x: wp.array3d(dtype=float), y: wp.array3d(dtype=float)): - i, j, k = wp.tid() - x[i, j, k] -= y[i, j, k] +@wp.kernel +def inplace_add_4d(x: wp.array4d(dtype=float), y: wp.array4d(dtype=float)): + i, j, k, l = wp.tid() + x[i, j, k, l] += y[i, j, k, l] - @wp.kernel - def inplace_sub_4d(x: wp.array4d(dtype=float), y: wp.array4d(dtype=float)): - i, j, k, l = wp.tid() - x[i, j, k, l] -= y[i, j, k, l] - @wp.kernel - def inplace_add_vecs(x: wp.array(dtype=wp.vec3), y: wp.array(dtype=wp.vec3)): - i = wp.tid() - x[i] += y[i] +@wp.kernel +def inplace_sub_1d(x: wp.array(dtype=float), y: wp.array(dtype=float)): + i = wp.tid() + x[i] -= y[i] - @wp.kernel - def inplace_add_mats(x: wp.array(dtype=wp.mat33), y: wp.array(dtype=wp.mat33)): - i = wp.tid() - x[i] += y[i] - @wp.kernel - def inplace_add_rhs(x: wp.array(dtype=float), y: wp.array(dtype=float), z: wp.array(dtype=float)): - i = wp.tid() - a = y[i] - a += x[i] - wp.atomic_add(z, 0, a) +@wp.kernel +def inplace_sub_2d(x: wp.array2d(dtype=float), y: wp.array2d(dtype=float)): + i, j = wp.tid() + x[i, j] -= y[i, j] + + +@wp.kernel +def inplace_sub_3d(x: wp.array3d(dtype=float), y: wp.array3d(dtype=float)): + i, j, k = wp.tid() + x[i, j, k] -= y[i, j, k] + +@wp.kernel +def inplace_sub_4d(x: wp.array4d(dtype=float), y: wp.array4d(dtype=float)): + i, j, k, l = wp.tid() + x[i, j, k, l] -= y[i, j, k, l] + + +@wp.kernel +def inplace_add_vecs(x: wp.array(dtype=wp.vec3), y: wp.array(dtype=wp.vec3)): + i = wp.tid() + x[i] += y[i] + + +@wp.kernel +def inplace_add_mats(x: wp.array(dtype=wp.mat33), y: wp.array(dtype=wp.mat33)): + i = wp.tid() + x[i] += y[i] + + +@wp.kernel +def inplace_add_rhs(x: wp.array(dtype=float), y: wp.array(dtype=float), z: wp.array(dtype=float)): + i = wp.tid() + a = y[i] + a += x[i] + wp.atomic_add(z, 0, a) + + +def test_array_inplace_diff_ops(test, device): N = 3 x1 = wp.ones(N, dtype=float, requires_grad=True, device=device) x2 = wp.ones((N, N), dtype=float, requires_grad=True, device=device) @@ -2528,6 +2539,32 @@ def inplace_add_rhs(x: wp.array(dtype=float), y: wp.array(dtype=float), z: wp.ar assert_np_equal(y.grad.numpy(), np.ones(1, dtype=float)) +@wp.kernel +def inplace_mul_1d(x: wp.array(dtype=float), y: wp.array(dtype=float)): + i = wp.tid() + x[i] *= y[i] + + +@wp.kernel +def inplace_div_1d(x: wp.array(dtype=float), y: wp.array(dtype=float)): + i = wp.tid() + x[i] /= y[i] + + +def test_array_inplace_non_diff_ops(test, device): + N = 3 + x1 = wp.full(N, value=10.0, dtype=float, device=device) + y1 = wp.full(N, value=5.0, dtype=float, device=device) + + wp.launch(inplace_mul_1d, N, inputs=[x1, y1], device=device) + assert_np_equal(x1.numpy(), np.full(N, fill_value=50.0, dtype=float)) + + x1.fill_(10.0) + y1.fill_(5.0) + wp.launch(inplace_div_1d, N, inputs=[x1, y1], device=device) + assert_np_equal(x1.numpy(), np.full(N, fill_value=2.0, dtype=float)) + + @wp.kernel def inc_scalar(a: wp.array(dtype=float)): tid = wp.tid() @@ -2750,7 +2787,8 @@ def test_array_new_del(self): add_function_test(TestArray, "test_array_aliasing_from_numpy", test_array_aliasing_from_numpy, devices=["cpu"]) add_function_test(TestArray, "test_numpy_array_interface", test_numpy_array_interface, devices=["cpu"]) -add_function_test(TestArray, "test_array_inplace_ops", test_array_inplace_ops, devices=devices) +add_function_test(TestArray, "test_array_inplace_diff_ops", test_array_inplace_diff_ops, devices=devices) +add_function_test(TestArray, "test_array_inplace_non_diff_ops", test_array_inplace_non_diff_ops, devices=devices) add_function_test(TestArray, "test_direct_from_numpy", test_direct_from_numpy, devices=["cpu"]) add_function_test(TestArray, "test_kernel_array_from_ptr", test_kernel_array_from_ptr, devices=devices)