How to define a custom_vjp for function that takes another function as an argument? #16540

mfkasim1 · 2023-06-23T17:11:42Z

mfkasim1
Jun 23, 2023

I have a function that takes another function as an argument and I want to define a custom vjp for that function. However, I don't know how to use it. Here is my code:

import jax
import jax.numpy as np
from functools import partial

# Define the function
@partial(jax.custom_vjp, nondiff_argnums=(0,))
def f(g, x):
    return g(x)

# Define the forward pass and backward pass (vjp)
def f_fwd(g, x):
    y = g(x)
    return y, (g, x)

def f_bwd(res, g_bar):
    g, x = res
    return (None, g(x) * g_bar)

# Associate them with the function
f.defvjp(f_fwd, f_bwd)

# Test it out
def square(x):
    return np.square(x)

x = np.array([2., 3., 4.])
print(jax.grad(f, argnums=1)(square, x))  # should print [4., 6., 8.]

When I run it, it gave me this error:

muhammad@mfk-a100:~/git/qpert/qpert$ python jaxexp.py 
Traceback (most recent call last):
  File "/home/muhammad/git/qpert/qpert/jaxexp.py", line 27, in <module>
    print(jax.grad(f, argnums=1)(square, x))  # should print [4., 6., 8.]
  File "/home/muhammad/anaconda3/envs/torch-dev/lib/python3.10/site-packages/jax/_src/traceback_util.py", line 166, in reraise_with_filtered_traceback
    return fun(*args, **kwargs)
  File "/home/muhammad/anaconda3/envs/torch-dev/lib/python3.10/site-packages/jax/_src/api.py", line 642, in grad_f
    _, g = value_and_grad_f(*args, **kwargs)
  File "/home/muhammad/anaconda3/envs/torch-dev/lib/python3.10/site-packages/jax/_src/traceback_util.py", line 166, in reraise_with_filtered_traceback
    return fun(*args, **kwargs)
  File "/home/muhammad/anaconda3/envs/torch-dev/lib/python3.10/site-packages/jax/_src/api.py", line 718, in value_and_grad_f
    ans, vjp_py = _vjp(f_partial, *dyn_args, reduce_axes=reduce_axes)
  File "/home/muhammad/anaconda3/envs/torch-dev/lib/python3.10/site-packages/jax/_src/api.py", line 2174, in _vjp
    out_primal, out_vjp = ad.vjp(
  File "/home/muhammad/anaconda3/envs/torch-dev/lib/python3.10/site-packages/jax/_src/interpreters/ad.py", line 139, in vjp
    out_primals, pvals, jaxpr, consts = linearize(traceable, *primals)
  File "/home/muhammad/anaconda3/envs/torch-dev/lib/python3.10/site-packages/jax/_src/interpreters/ad.py", line 128, in linearize
    jaxpr, out_pvals, consts = pe.trace_to_jaxpr_nounits(jvpfun_flat, in_pvals)
  File "/home/muhammad/anaconda3/envs/torch-dev/lib/python3.10/site-packages/jax/_src/profiler.py", line 314, in wrapper
    return func(*args, **kwargs)
  File "/home/muhammad/anaconda3/envs/torch-dev/lib/python3.10/site-packages/jax/_src/interpreters/partial_eval.py", line 777, in trace_to_jaxpr_nounits
    jaxpr, (out_pvals, consts, env) = fun.call_wrapped(pvals)
  File "/home/muhammad/anaconda3/envs/torch-dev/lib/python3.10/site-packages/jax/_src/linear_util.py", line 188, in call_wrapped
    ans = self.f(*args, **dict(self.params, **kwargs))
  File "/home/muhammad/anaconda3/envs/torch-dev/lib/python3.10/site-packages/jax/_src/traceback_util.py", line 166, in reraise_with_filtered_traceback
    return fun(*args, **kwargs)
  File "/home/muhammad/anaconda3/envs/torch-dev/lib/python3.10/site-packages/jax/_src/custom_derivatives.py", line 614, in __call__
    out_flat = custom_vjp_call_p.bind(flat_fun, flat_fwd, flat_bwd,
  File "/home/muhammad/anaconda3/envs/torch-dev/lib/python3.10/site-packages/jax/_src/custom_derivatives.py", line 763, in bind
    outs = top_trace.process_custom_vjp_call(self, fun, fwd, bwd_, tracers,
  File "/home/muhammad/anaconda3/envs/torch-dev/lib/python3.10/site-packages/jax/_src/interpreters/ad.py", line 402, in process_custom_vjp_call
    tangents_out = custom_lin_p.bind(
  File "/home/muhammad/anaconda3/envs/torch-dev/lib/python3.10/site-packages/jax/_src/core.py", line 380, in bind
    return self.bind_with_trace(find_top_trace(args), args, params)
  File "/home/muhammad/anaconda3/envs/torch-dev/lib/python3.10/site-packages/jax/_src/core.py", line 383, in bind_with_trace
    out = trace.process_primitive(self, map(trace.full_raise, args), params)
  File "/home/muhammad/anaconda3/envs/torch-dev/lib/python3.10/site-packages/jax/_src/interpreters/partial_eval.py", line 215, in process_primitive
    return self.default_process_primitive(primitive, tracers, params)
  File "/home/muhammad/anaconda3/envs/torch-dev/lib/python3.10/site-packages/jax/_src/interpreters/partial_eval.py", line 224, in default_process_primitive
    tracers = map(self.instantiate_const, tracers)
  File "/home/muhammad/anaconda3/envs/torch-dev/lib/python3.10/site-packages/jax/_src/interpreters/partial_eval.py", line 201, in instantiate_const
    return self.new_instantiated_const(const)
  File "/home/muhammad/anaconda3/envs/torch-dev/lib/python3.10/site-packages/jax/_src/interpreters/partial_eval.py", line 168, in new_instantiated_const
    aval = get_aval(val)
  File "/home/muhammad/anaconda3/envs/torch-dev/lib/python3.10/site-packages/jax/_src/core.py", line 1334, in get_aval
    return concrete_aval(x)
  File "/home/muhammad/anaconda3/envs/torch-dev/lib/python3.10/site-packages/jax/_src/core.py", line 1326, in concrete_aval
    raise TypeError(f"Value {repr(x)} with type {type(x)} is not a valid JAX "
jax._src.traceback_util.UnfilteredStackTrace: TypeError: Value <function square at 0x7f88c435f370> with type <class 'function'> is not a valid JAX type

The stack trace below excludes JAX-internal frames.
The preceding is the original exception that occurred, unmodified.

--------------------

The above exception was the direct cause of the following exception:

Traceback (most recent call last):
  File "/home/muhammad/git/qpert/qpert/jaxexp.py", line 27, in <module>
    print(jax.grad(f, argnums=1)(square, x))  # should print [4., 6., 8.]
TypeError: Value <function square at 0x7f88c435f370> with type <class 'function'> is not a valid JAX type

Is there any way to define custom_vjp for a function that takes another function?

Answered by jakevdp

Jun 23, 2023

Take a look at jax.custom_vjp with nondiff_argnums. It basically covers exactly this question, though the example is a bit misleading because it doesn't actually compute the gradient of the input function.

In the case of your example function, the full solution might look something like this:

import jax
import jax.numpy as jnp
from functools import partial

@partial(jax.custom_vjp, nondiff_argnums=(0,))
def f(g, x):
    return g(x)

def f_fwd(g, x):
    # Note: g_x is equivalent to f(g, x) here, but in general this will not be the case.
    g_x, g_vjp = jax.vjp(g, x)
    return f(g, x), (g_x, g_vjp)

def f_bwd(g, res, g_bar):
    # Note: g_x unneeded for this simple function f, but in gen…

View full answer

jakevdp · 2023-06-23T19:21:06Z

jakevdp
Jun 23, 2023
Maintainer

Take a look at jax.custom_vjp with nondiff_argnums. It basically covers exactly this question, though the example is a bit misleading because it doesn't actually compute the gradient of the input function.

In the case of your example function, the full solution might look something like this:

import jax
import jax.numpy as jnp
from functools import partial

@partial(jax.custom_vjp, nondiff_argnums=(0,))
def f(g, x):
    return g(x)

def f_fwd(g, x):
    # Note: g_x is equivalent to f(g, x) here, but in general this will not be the case.
    g_x, g_vjp = jax.vjp(g, x)
    return f(g, x), (g_x, g_vjp)

def f_bwd(g, res, g_bar):
    # Note: g_x unneeded for this simple function f, but in general the gradient will depend on it.
    g_x, g_vjp = res
    return g_vjp(g_bar)

f.defvjp(f_fwd, f_bwd)

def square(x):
    return jnp.square(x)

x = np.array([2., 3., 4.])
print(jax.vmap(jax.grad(f, argnums=1), in_axes=(None, 0))(square, x))
# [4. 6. 8.]

(Note I had to use vmap(grad(f)) in the last line, because grad must have a scalar output).

2 replies

mfkasim1 Jun 23, 2023
Author

Thanks! Is there a reason why jax.vjp needs to be in f_fwd? I tried putting it in backward and it does not work:

import jax
import jax.numpy as jnp
from functools import partial

@partial(jax.custom_vjp, nondiff_argnums=(0,))
def f(g, x):
    return g(x)

def f_fwd(g, x):
    # Note: g_x is equivalent to f(g, x) here, but in general this will not be the case.
    return f(g, x), (g, x)

def f_bwd(g, res, g_bar):
    # Note: g_x unneeded for this simple function f, but in general the gradient will depend on it.
    g, x = res
    g_x, g_vjp = jax.vjp(g, x)
    return g_vjp(g_bar)

f.defvjp(f_fwd, f_bwd)

def square(x):
    return jnp.square(x)

x = jnp.array([2., 3., 4.])
print(jax.vmap(jax.grad(f, argnums=1), in_axes=(None, 0))(square, x))
# TypeError: Value <function square at 0x7ff7e1e0b370> with type <class 'function'> is not a valid JAX type

jakevdp Jun 24, 2023
Maintainer

f_fwd must return a pytree of objects of valid JAX type. A plain function like g is neither a pytree nor a valid JAX type, so it can't be returned directly. If you want to do things this way, you could write f_fwd this way:

def f_fwd(g, x):
    return f(g, x), (jax.tree_util.Partial(g), x)

The reason the initial version works without Partial is that f_vjp comes wrapped in Partial by default, because it's typically used in such a setting.

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

How to define a custom_vjp for function that takes another function as an argument? #16540

{{title}}

Replies: 1 comment 2 replies

{{title}}

{{editor}}'s edit

{{editor}}'s edit

{{title}}

{{title}}

{{editor}}'s edit

{{editor}}'s edit

Select a reply

How to define a custom_vjp for function that takes another function as an argument? #16540

mfkasim1 Jun 23, 2023

Replies: 1 comment · 2 replies

jakevdp Jun 23, 2023 Maintainer

mfkasim1 Jun 23, 2023 Author

jakevdp Jun 24, 2023 Maintainer

mfkasim1
Jun 23, 2023

Replies: 1 comment 2 replies

jakevdp
Jun 23, 2023
Maintainer

mfkasim1 Jun 23, 2023
Author

jakevdp Jun 24, 2023
Maintainer