From 83789c509bda8ee3138564ee2c78ad3be37e0cb4 Mon Sep 17 00:00:00 2001 From: gmagogsfm Date: Thu, 10 Jul 2025 16:54:23 -0700 Subject: [PATCH] Remove references to TorchScript in docs --- advanced_source/cpp_export.rst | 392 +------ advanced_source/torch-script-parallelism.rst | 284 +---- .../torch_script_custom_classes.rst | 364 +----- .../CMakeLists.txt | 15 - .../custom_class_project/CMakeLists.txt | 10 - .../custom_class_project/class.cpp | 132 --- .../custom_class_project/custom_test.py | 54 - .../custom_class_project/export_attr.py | 21 - .../custom_class_project/save.py | 18 - .../torch_script_custom_classes/infer.cpp | 20 - .../torch_script_custom_classes/run.sh | 21 - .../torch_script_custom_classes/run2.sh | 13 - advanced_source/torch_script_custom_ops.rst | 1039 +---------------- .../torch_script_custom_ops/CMakeLists.txt | 14 - .../torch_script_custom_ops/op.cpp | 36 - .../torch_script_custom_ops/smoke_test.py | 3 - .../torch_script_custom_ops/test.py | 34 - .../Intro_to_TorchScript_tutorial.py | 400 ------- prototype_source/README.txt | 10 +- prototype_source/torchscript_freezing.py | 136 --- .../distributed_optim_torchscript.rst | 216 +--- recipes_source/torchscript_inference.rst | 202 +--- 22 files changed, 20 insertions(+), 3414 deletions(-) delete mode 100644 advanced_source/torch_script_custom_classes/CMakeLists.txt delete mode 100644 advanced_source/torch_script_custom_classes/custom_class_project/CMakeLists.txt delete mode 100644 advanced_source/torch_script_custom_classes/custom_class_project/class.cpp delete mode 100644 advanced_source/torch_script_custom_classes/custom_class_project/custom_test.py delete mode 100644 advanced_source/torch_script_custom_classes/custom_class_project/export_attr.py delete mode 100644 advanced_source/torch_script_custom_classes/custom_class_project/save.py delete mode 100644 advanced_source/torch_script_custom_classes/infer.cpp delete mode 100755 advanced_source/torch_script_custom_classes/run.sh delete mode 100755 advanced_source/torch_script_custom_classes/run2.sh delete mode 100644 advanced_source/torch_script_custom_ops/CMakeLists.txt delete mode 100644 advanced_source/torch_script_custom_ops/op.cpp delete mode 100644 advanced_source/torch_script_custom_ops/smoke_test.py delete mode 100644 advanced_source/torch_script_custom_ops/test.py delete mode 100644 beginner_source/Intro_to_TorchScript_tutorial.py delete mode 100644 prototype_source/torchscript_freezing.py diff --git a/advanced_source/cpp_export.rst b/advanced_source/cpp_export.rst index 286c79622dd..3d2042bd6b7 100644 --- a/advanced_source/cpp_export.rst +++ b/advanced_source/cpp_export.rst @@ -1,389 +1,3 @@ -Loading a TorchScript Model in C++ -===================================== - -.. warning:: TorchScript is no longer in active development. - -As its name suggests, the primary interface to PyTorch is the Python -programming language. While Python is a suitable and preferred language for -many scenarios requiring dynamism and ease of iteration, there are equally many -situations where precisely these properties of Python are unfavorable. One -environment in which the latter often applies is *production* -- the land of -low latencies and strict deployment requirements. For production scenarios, C++ -is very often the language of choice, even if only to bind it into another -language like Java, Rust or Go. The following paragraphs will outline the path -PyTorch provides to go from an existing Python model to a serialized -representation that can be *loaded* and *executed* purely from C++, with no -dependency on Python. - -Step 1: Converting Your PyTorch Model to Torch Script ------------------------------------------------------ - -A PyTorch model's journey from Python to C++ is enabled by `Torch Script -`_, a representation of a PyTorch -model that can be understood, compiled and serialized by the Torch Script -compiler. If you are starting out from an existing PyTorch model written in the -vanilla "eager" API, you must first convert your model to Torch Script. In the -most common cases, discussed below, this requires only little effort. If you -already have a Torch Script module, you can skip to the next section of this -tutorial. - -There exist two ways of converting a PyTorch model to Torch Script. The first -is known as *tracing*, a mechanism in which the structure of the model is -captured by evaluating it once using example inputs, and recording the flow of -those inputs through the model. This is suitable for models that make limited -use of control flow. The second approach is to add explicit annotations to your -model that inform the Torch Script compiler that it may directly parse and -compile your model code, subject to the constraints imposed by the Torch Script -language. - -.. tip:: - - You can find the complete documentation for both of these methods, as well as - further guidance on which to use, in the official `Torch Script - reference `_. - -Converting to Torch Script via Tracing -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -To convert a PyTorch model to Torch Script via tracing, you must pass an -instance of your model along with an example input to the ``torch.jit.trace`` -function. This will produce a ``torch.jit.ScriptModule`` object with the trace -of your model evaluation embedded in the module's ``forward`` method:: - - import torch - import torchvision - - # An instance of your model. - model = torchvision.models.resnet18() - - # An example input you would normally provide to your model's forward() method. - example = torch.rand(1, 3, 224, 224) - - # Use torch.jit.trace to generate a torch.jit.ScriptModule via tracing. - traced_script_module = torch.jit.trace(model, example) - -The traced ``ScriptModule`` can now be evaluated identically to a regular -PyTorch module:: - - In[1]: output = traced_script_module(torch.ones(1, 3, 224, 224)) - In[2]: output[0, :5] - Out[2]: tensor([-0.2698, -0.0381, 0.4023, -0.3010, -0.0448], grad_fn=) - -Converting to Torch Script via Annotation -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -Under certain circumstances, such as if your model employs particular forms of -control flow, you may want to write your model in Torch Script directly and -annotate your model accordingly. For example, say you have the following -vanilla Pytorch model:: - - import torch - - class MyModule(torch.nn.Module): - def __init__(self, N, M): - super(MyModule, self).__init__() - self.weight = torch.nn.Parameter(torch.rand(N, M)) - - def forward(self, input): - if input.sum() > 0: - output = self.weight.mv(input) - else: - output = self.weight + input - return output - - -Because the ``forward`` method of this module uses control flow that is -dependent on the input, it is not suitable for tracing. Instead, we can convert -it to a ``ScriptModule``. -In order to convert the module to the ``ScriptModule``, one needs to -compile the module with ``torch.jit.script`` as follows:: - - class MyModule(torch.nn.Module): - def __init__(self, N, M): - super(MyModule, self).__init__() - self.weight = torch.nn.Parameter(torch.rand(N, M)) - - def forward(self, input): - if input.sum() > 0: - output = self.weight.mv(input) - else: - output = self.weight + input - return output - - my_module = MyModule(10,20) - sm = torch.jit.script(my_module) - -If you need to exclude some methods in your ``nn.Module`` -because they use Python features that TorchScript doesn't support yet, -you could annotate those with ``@torch.jit.ignore`` - -``sm`` is an instance of -``ScriptModule`` that is ready for serialization. - -Step 2: Serializing Your Script Module to a File -------------------------------------------------- - -Once you have a ``ScriptModule`` in your hands, either from tracing or -annotating a PyTorch model, you are ready to serialize it to a file. Later on, -you'll be able to load the module from this file in C++ and execute it without -any dependency on Python. Say we want to serialize the ``ResNet18`` model shown -earlier in the tracing example. To perform this serialization, simply call -`save `_ -on the module and pass it a filename:: - - traced_script_module.save("traced_resnet_model.pt") - -This will produce a ``traced_resnet_model.pt`` file in your working directory. -If you also would like to serialize ``sm``, call ``sm.save("my_module_model.pt")`` -We have now officially left the realm of Python and are ready to cross over to the sphere -of C++. - -Step 3: Loading Your Script Module in C++ ------------------------------------------- - -To load your serialized PyTorch model in C++, your application must depend on -the PyTorch C++ API -- also known as *LibTorch*. The LibTorch distribution -encompasses a collection of shared libraries, header files and CMake build -configuration files. While CMake is not a requirement for depending on -LibTorch, it is the recommended approach and will be well supported into the -future. For this tutorial, we will be building a minimal C++ application using -CMake and LibTorch that simply loads and executes a serialized PyTorch model. - -A Minimal C++ Application -^^^^^^^^^^^^^^^^^^^^^^^^^ - -Let's begin by discussing the code to load a module. The following will already -do: - -.. code-block:: cpp - - #include // One-stop header. - - #include - #include - - int main(int argc, const char* argv[]) { - if (argc != 2) { - std::cerr << "usage: example-app \n"; - return -1; - } - - - torch::jit::script::Module module; - try { - // Deserialize the ScriptModule from a file using torch::jit::load(). - module = torch::jit::load(argv[1]); - } - catch (const c10::Error& e) { - std::cerr << "error loading the model\n"; - return -1; - } - - std::cout << "ok\n"; - } - - -The ```` header encompasses all relevant includes from the -LibTorch library necessary to run the example. Our application accepts the file -path to a serialized PyTorch ``ScriptModule`` as its only command line argument -and then proceeds to deserialize the module using the ``torch::jit::load()`` -function, which takes this file path as input. In return we receive a ``torch::jit::script::Module`` -object. We will examine how to execute it in a moment. - -Depending on LibTorch and Building the Application -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -Assume we stored the above code into a file called ``example-app.cpp``. A -minimal ``CMakeLists.txt`` to build it could look as simple as: - -.. code-block:: cmake - - cmake_minimum_required(VERSION 3.0 FATAL_ERROR) - project(custom_ops) - - find_package(Torch REQUIRED) - - add_executable(example-app example-app.cpp) - target_link_libraries(example-app "${TORCH_LIBRARIES}") - set_property(TARGET example-app PROPERTY CXX_STANDARD 17) - -The last thing we need to build the example application is the LibTorch -distribution. You can always grab the latest stable release from the `download -page `_ on the PyTorch website. If you download and unzip -the latest archive, you should receive a folder with the following directory -structure: - -.. code-block:: sh - - libtorch/ - bin/ - include/ - lib/ - share/ - -- The ``lib/`` folder contains the shared libraries you must link against, -- The ``include/`` folder contains header files your program will need to include, -- The ``share/`` folder contains the necessary CMake configuration to enable the simple ``find_package(Torch)`` command above. - -.. tip:: - On Windows, debug and release builds are not ABI-compatible. If you plan to - build your project in debug mode, please try the debug version of LibTorch. - Also, make sure you specify the correct configuration in the ``cmake --build .`` - line below. - -The last step is building the application. For this, assume our example -directory is laid out like this: - -.. code-block:: sh - - example-app/ - CMakeLists.txt - example-app.cpp - -We can now run the following commands to build the application from within the -``example-app/`` folder: - -.. code-block:: sh - - mkdir build - cd build - cmake -DCMAKE_PREFIX_PATH=/path/to/libtorch .. - cmake --build . --config Release - -where ``/path/to/libtorch`` should be the full path to the unzipped LibTorch -distribution. If all goes well, it will look something like this: - -.. code-block:: sh - - root@4b5a67132e81:/example-app# mkdir build - root@4b5a67132e81:/example-app# cd build - root@4b5a67132e81:/example-app/build# cmake -DCMAKE_PREFIX_PATH=/path/to/libtorch .. - -- The C compiler identification is GNU 5.4.0 - -- The CXX compiler identification is GNU 5.4.0 - -- Check for working C compiler: /usr/bin/cc - -- Check for working C compiler: /usr/bin/cc -- works - -- Detecting C compiler ABI info - -- Detecting C compiler ABI info - done - -- Detecting C compile features - -- Detecting C compile features - done - -- Check for working CXX compiler: /usr/bin/c++ - -- Check for working CXX compiler: /usr/bin/c++ -- works - -- Detecting CXX compiler ABI info - -- Detecting CXX compiler ABI info - done - -- Detecting CXX compile features - -- Detecting CXX compile features - done - -- Looking for pthread.h - -- Looking for pthread.h - found - -- Looking for pthread_create - -- Looking for pthread_create - not found - -- Looking for pthread_create in pthreads - -- Looking for pthread_create in pthreads - not found - -- Looking for pthread_create in pthread - -- Looking for pthread_create in pthread - found - -- Found Threads: TRUE - -- Configuring done - -- Generating done - -- Build files have been written to: /example-app/build - root@4b5a67132e81:/example-app/build# make - Scanning dependencies of target example-app - [ 50%] Building CXX object CMakeFiles/example-app.dir/example-app.cpp.o - [100%] Linking CXX executable example-app - [100%] Built target example-app - -If we supply the path to the traced ``ResNet18`` model ``traced_resnet_model.pt`` we created earlier -to the resulting ``example-app`` binary, we should be rewarded with a friendly -"ok". Please note, if try to run this example with ``my_module_model.pt`` you will get an error saying that -your input is of an incompatible shape. ``my_module_model.pt`` expects 1D instead of 4D. - -.. code-block:: sh - - root@4b5a67132e81:/example-app/build# ./example-app /traced_resnet_model.pt - ok - -Step 4: Executing the Script Module in C++ ------------------------------------------- - -Having successfully loaded our serialized ``ResNet18`` in C++, we are now just a -couple lines of code away from executing it! Let's add those lines to our C++ -application's ``main()`` function: - -.. code-block:: cpp - - // Create a vector of inputs. - std::vector inputs; - inputs.push_back(torch::ones({1, 3, 224, 224})); - - // Execute the model and turn its output into a tensor. - at::Tensor output = module.forward(inputs).toTensor(); - std::cout << output.slice(/*dim=*/1, /*start=*/0, /*end=*/5) << '\n'; - -The first two lines set up the inputs to our model. We create a vector of -``torch::jit::IValue`` (a type-erased value type ``script::Module`` methods -accept and return) and add a single input. To create the input tensor, we use -``torch::ones()``, the equivalent to ``torch.ones`` in the C++ API. We then -run the ``script::Module``'s ``forward`` method, passing it the input vector we -created. In return we get a new ``IValue``, which we convert to a tensor by -calling ``toTensor()``. - -.. tip:: - - To learn more about functions like ``torch::ones`` and the PyTorch C++ API in - general, refer to its documentation at https://pytorch.org/cppdocs. The - PyTorch C++ API provides near feature parity with the Python API, allowing - you to further manipulate and process tensors just like in Python. - -In the last line, we print the first five entries of the output. Since we -supplied the same input to our model in Python earlier in this tutorial, we -should ideally see the same output. Let's try it out by re-compiling our -application and running it with the same serialized model: - -.. code-block:: sh - - root@4b5a67132e81:/example-app/build# make - Scanning dependencies of target example-app - [ 50%] Building CXX object CMakeFiles/example-app.dir/example-app.cpp.o - [100%] Linking CXX executable example-app - [100%] Built target example-app - root@4b5a67132e81:/example-app/build# ./example-app traced_resnet_model.pt - -0.2698 -0.0381 0.4023 -0.3010 -0.0448 - [ Variable[CPUFloatType]{1,5} ] - - -For reference, the output in Python previously was:: - - tensor([-0.2698, -0.0381, 0.4023, -0.3010, -0.0448], grad_fn=) - -Looks like a good match! - -.. tip:: - - To move your model to GPU memory, you can write ``model.to(at::kCUDA);``. - Make sure the inputs to a model are also living in CUDA memory - by calling ``tensor.to(at::kCUDA)``, which will return a new tensor in CUDA - memory. - -Step 5: Getting Help and Exploring the API ------------------------------------------- - -This tutorial has hopefully equipped you with a general understanding of a -PyTorch model's path from Python to C++. With the concepts described in this -tutorial, you should be able to go from a vanilla, "eager" PyTorch model, to a -compiled ``ScriptModule`` in Python, to a serialized file on disk and -- to -close the loop -- to an executable ``script::Module`` in C++. - -Of course, there are many concepts we did not cover. For example, you may find -yourself wanting to extend your ``ScriptModule`` with a custom operator -implemented in C++ or CUDA, and executing this custom operator inside your -``ScriptModule`` loaded in your pure C++ production environment. The good news -is: this is possible, and well supported! For now, you can explore `this -`_ folder -for examples, and we will follow up with a tutorial shortly. In the time being, -the following links may be generally helpful: - -- The Torch Script reference: https://pytorch.org/docs/master/jit.html -- The PyTorch C++ API documentation: https://pytorch.org/cppdocs/ -- The PyTorch Python API documentation: https://pytorch.org/docs/ - -As always, if you run into any problems or have questions, you can use our -`forum `_ or `GitHub issues -`_ to get in touch. +.. warning:: + TorchScript is deprecated, please use + `torch.export`_ instead. \ No newline at end of file diff --git a/advanced_source/torch-script-parallelism.rst b/advanced_source/torch-script-parallelism.rst index 09900dbf0d3..3d2042bd6b7 100644 --- a/advanced_source/torch-script-parallelism.rst +++ b/advanced_source/torch-script-parallelism.rst @@ -1,281 +1,3 @@ -Dynamic Parallelism in TorchScript -================================== - -.. warning:: TorchScript is no longer in active development. - -In this tutorial, we introduce the syntax for doing *dynamic inter-op parallelism* -in TorchScript. This parallelism has the following properties: - -* dynamic - The number of parallel tasks created and their workload can depend on the control flow of the program. -* inter-op - The parallelism is concerned with running TorchScript program fragments in parallel. This is distinct from *intra-op parallelism*, which is concerned with splitting up individual operators and running subsets of the operator's work in parallel. -Basic Syntax ------------- - -The two important APIs for dynamic parallelism are: - -* ``torch.jit.fork(fn : Callable[..., T], *args, **kwargs) -> torch.jit.Future[T]`` -* ``torch.jit.wait(fut : torch.jit.Future[T]) -> T`` - -A good way to demonstrate how these work is by way of an example: - -.. code-block:: python - - import torch - - def foo(x): - return torch.neg(x) - - @torch.jit.script - def example(x): - # Call `foo` using parallelism: - # First, we "fork" off a task. This task will run `foo` with argument `x` - future = torch.jit.fork(foo, x) - - # Call `foo` normally - x_normal = foo(x) - - # Second, we "wait" on the task. Since the task may be running in - # parallel, we have to "wait" for its result to become available. - # Notice that by having lines of code between the "fork()" and "wait()" - # call for a given Future, we can overlap computations so that they - # run in parallel. - x_parallel = torch.jit.wait(future) - - return x_normal, x_parallel - - print(example(torch.ones(1))) # (-1., -1.) - - -``fork()`` takes the callable ``fn`` and arguments to that callable ``args`` -and ``kwargs`` and creates an asynchronous task for the execution of ``fn``. -``fn`` can be a function, method, or Module instance. ``fork()`` returns a -reference to the value of the result of this execution, called a ``Future``. -Because ``fork`` returns immediately after creating the async task, ``fn`` may -not have been executed by the time the line of code after the ``fork()`` call -is executed. Thus, ``wait()`` is used to wait for the async task to complete -and return the value. - -These constructs can be used to overlap the execution of statements within a -function (shown in the worked example section) or be composed with other language -constructs like loops: - -.. code-block:: python - - import torch - from typing import List - - def foo(x): - return torch.neg(x) - - @torch.jit.script - def example(x): - futures : List[torch.jit.Future[torch.Tensor]] = [] - for _ in range(100): - futures.append(torch.jit.fork(foo, x)) - - results = [] - for future in futures: - results.append(torch.jit.wait(future)) - - return torch.sum(torch.stack(results)) - - print(example(torch.ones([]))) - -.. note:: - - When we initialized an empty list of Futures, we needed to add an explicit - type annotation to ``futures``. In TorchScript, empty containers default - to assuming they contain Tensor values, so we annotate the list constructor - # as being of type ``List[torch.jit.Future[torch.Tensor]]`` - -This example uses ``fork()`` to launch 100 instances of the function ``foo``, -waits on the 100 tasks to complete, then sums the results, returning ``-100.0``. - -Applied Example: Ensemble of Bidirectional LSTMs ------------------------------------------------- - -Let's try to apply parallelism to a more realistic example and see what sort -of performance we can get out of it. First, let's define the baseline model: an -ensemble of bidirectional LSTM layers. - -.. code-block:: python - - import torch, time - - # In RNN parlance, the dimensions we care about are: - # # of time-steps (T) - # Batch size (B) - # Hidden size/number of "channels" (C) - T, B, C = 50, 50, 1024 - - # A module that defines a single "bidirectional LSTM". This is simply two - # LSTMs applied to the same sequence, but one in reverse - class BidirectionalRecurrentLSTM(torch.nn.Module): - def __init__(self): - super().__init__() - self.cell_f = torch.nn.LSTM(input_size=C, hidden_size=C) - self.cell_b = torch.nn.LSTM(input_size=C, hidden_size=C) - - def forward(self, x : torch.Tensor) -> torch.Tensor: - # Forward layer - output_f, _ = self.cell_f(x) - - # Backward layer. Flip input in the time dimension (dim 0), apply the - # layer, then flip the outputs in the time dimension - x_rev = torch.flip(x, dims=[0]) - output_b, _ = self.cell_b(torch.flip(x, dims=[0])) - output_b_rev = torch.flip(output_b, dims=[0]) - - return torch.cat((output_f, output_b_rev), dim=2) - - - # An "ensemble" of `BidirectionalRecurrentLSTM` modules. The modules in the - # ensemble are run one-by-one on the same input then their results are - # stacked and summed together, returning the combined result. - class LSTMEnsemble(torch.nn.Module): - def __init__(self, n_models): - super().__init__() - self.n_models = n_models - self.models = torch.nn.ModuleList([ - BidirectionalRecurrentLSTM() for _ in range(self.n_models)]) - - def forward(self, x : torch.Tensor) -> torch.Tensor: - results = [] - for model in self.models: - results.append(model(x)) - return torch.stack(results).sum(dim=0) - - # For a head-to-head comparison to what we're going to do with fork/wait, let's - # instantiate the model and compile it with TorchScript - ens = torch.jit.script(LSTMEnsemble(n_models=4)) - - # Normally you would pull this input out of an embedding table, but for the - # purpose of this demo let's just use random data. - x = torch.rand(T, B, C) - - # Let's run the model once to warm up things like the memory allocator - ens(x) - - x = torch.rand(T, B, C) - - # Let's see how fast it runs! - s = time.time() - ens(x) - print('Inference took', time.time() - s, ' seconds') - -On my machine, this network runs in ``2.05`` seconds. We can do a lot better! - -Parallelizing Forward and Backward Layers ------------------------------------------ - -A very simple thing we can do is parallelize the forward and backward layers -within ``BidirectionalRecurrentLSTM``. For this, the structure of the computation -is static, so we don't actually even need any loops. Let's rewrite the ``forward`` -method of ``BidirectionalRecurrentLSTM`` like so: - -.. code-block:: python - - def forward(self, x : torch.Tensor) -> torch.Tensor: - # Forward layer - fork() so this can run in parallel to the backward - # layer - future_f = torch.jit.fork(self.cell_f, x) - - # Backward layer. Flip input in the time dimension (dim 0), apply the - # layer, then flip the outputs in the time dimension - x_rev = torch.flip(x, dims=[0]) - output_b, _ = self.cell_b(torch.flip(x, dims=[0])) - output_b_rev = torch.flip(output_b, dims=[0]) - - # Retrieve the output from the forward layer. Note this needs to happen - # *after* the stuff we want to parallelize with - output_f, _ = torch.jit.wait(future_f) - - return torch.cat((output_f, output_b_rev), dim=2) - -In this example, ``forward()`` delegates execution of ``cell_f`` to another thread, -while it continues to execute ``cell_b``. This causes the execution of both the -cells to be overlapped with each other. - -Running the script again with this simple modification yields a runtime of -``1.71`` seconds for an improvement of ``17%``! - -Aside: Visualizing Parallelism ------------------------------- - -We're not done optimizing our model but it's worth introducing the tooling we -have for visualizing performance. One important tool is the `PyTorch profiler `_. - -Let's use the profiler along with the Chrome trace export functionality to -visualize the performance of our parallelized model: - -.. code-block:: python - - with torch.autograd.profiler.profile() as prof: - ens(x) - prof.export_chrome_trace('parallel.json') - -This snippet of code will write out a file named ``parallel.json``. If you -navigate Google Chrome to ``chrome://tracing``, click the ``Load`` button, and -load in that JSON file, you should see a timeline like the following: - -.. image:: https://i.imgur.com/rm5hdG9.png - -The horizontal axis of the timeline represents time and the vertical axis -represents threads of execution. As we can see, we are running two ``lstm`` -instances at a time. This is the result of our hard work parallelizing the -bidirectional layers! - -Parallelizing Models in the Ensemble ------------------------------------- - -You may have noticed that there is a further parallelization opportunity in our -code: we can also run the models contained in ``LSTMEnsemble`` in parallel with -each other. The way to do that is simple enough, this is how we should change -the ``forward`` method of ``LSTMEnsemble``: - -.. code-block:: python - - def forward(self, x : torch.Tensor) -> torch.Tensor: - # Launch tasks for each model - futures : List[torch.jit.Future[torch.Tensor]] = [] - for model in self.models: - futures.append(torch.jit.fork(model, x)) - - # Collect the results from the launched tasks - results : List[torch.Tensor] = [] - for future in futures: - results.append(torch.jit.wait(future)) - - return torch.stack(results).sum(dim=0) - -Or, if you value brevity, we can use list comprehensions: - -.. code-block:: python - - def forward(self, x : torch.Tensor) -> torch.Tensor: - futures = [torch.jit.fork(model, x) for model in self.models] - results = [torch.jit.wait(fut) for fut in futures] - return torch.stack(results).sum(dim=0) - -Like described in the intro, we've used loops to fork off tasks for each of the -models in our ensemble. We've then used another loop to wait for all of the -tasks to be completed. This provides even more overlap of computation. - -With this small update, the script runs in ``1.4`` seconds, for a total speedup -of ``32%``! Pretty good for two lines of code. - -We can also use the Chrome tracer again to see where's going on: - -.. image:: https://i.imgur.com/kA0gyQm.png - -We can now see that all ``LSTM`` instances are being run fully in parallel. - -Conclusion ----------- - -In this tutorial, we learned about ``fork()`` and ``wait()``, the basic APIs -for doing dynamic, inter-op parallelism in TorchScript. We saw a few typical -usage patterns for using these functions to parallelize the execution of -functions, methods, or ``Modules`` in TorchScript code. Finally, we worked through -an example of optimizing a model using this technique and explored the performance -measurement and visualization tooling available in PyTorch. +.. warning:: + TorchScript is deprecated, please use + `torch.export`_ instead. \ No newline at end of file diff --git a/advanced_source/torch_script_custom_classes.rst b/advanced_source/torch_script_custom_classes.rst index 8586a032aae..3d2042bd6b7 100644 --- a/advanced_source/torch_script_custom_classes.rst +++ b/advanced_source/torch_script_custom_classes.rst @@ -1,361 +1,3 @@ -Extending TorchScript with Custom C++ Classes -=============================================== - -.. warning:: TorchScript is no longer in active development. - -This tutorial is a follow-on to the -:doc:`custom operator ` -tutorial, and introduces the API we've built for binding C++ classes into TorchScript -and Python simultaneously. The API is very similar to -`pybind11 `_, and most of the concepts will transfer -over if you're familiar with that system. - -Implementing and Binding the Class in C++ ------------------------------------------ - -For this tutorial, we are going to define a simple C++ class that maintains persistent -state in a member variable. - -.. literalinclude:: ../advanced_source/torch_script_custom_classes/custom_class_project/class.cpp - :language: cpp - :start-after: BEGIN class - :end-before: END class - -There are several things to note: - -- ``torch/custom_class.h`` is the header you need to include to extend TorchScript - with your custom class. -- Notice that whenever we are working with instances of the custom - class, we do it via instances of ``c10::intrusive_ptr<>``. Think of ``intrusive_ptr`` - as a smart pointer like ``std::shared_ptr``, but the reference count is stored - directly in the object, as opposed to a separate metadata block (as is done in - ``std::shared_ptr``. ``torch::Tensor`` internally uses the same pointer type; - and custom classes have to also use this pointer type so that we can - consistently manage different object types. -- The second thing to notice is that the user-defined class must inherit from - ``torch::CustomClassHolder``. This ensures that the custom class has space to - store the reference count. - -Now let's take a look at how we will make this class visible to TorchScript, a process called -*binding* the class: - -.. literalinclude:: ../advanced_source/torch_script_custom_classes/custom_class_project/class.cpp - :language: cpp - :start-after: BEGIN binding - :end-before: END binding - :append: - ; - } - - - -Building the Example as a C++ Project With CMake ------------------------------------------------- - -Now, we're going to build the above C++ code with the `CMake -`_ build system. First, take all the C++ code -we've covered so far and place it in a file called ``class.cpp``. -Then, write a simple ``CMakeLists.txt`` file and place it in the -same directory. Here is what ``CMakeLists.txt`` should look like: - -.. literalinclude:: ../advanced_source/torch_script_custom_classes/custom_class_project/CMakeLists.txt - :language: cmake - -Also, create a ``build`` directory. Your file tree should look like this:: - - custom_class_project/ - class.cpp - CMakeLists.txt - build/ - -We assume you've setup your environment in the same way as described in -the :doc:`previous tutorial `. -Go ahead and invoke cmake and then make to build the project: - -.. code-block:: shell - - $ cd build - $ cmake -DCMAKE_PREFIX_PATH="$(python -c 'import torch.utils; print(torch.utils.cmake_prefix_path)')" .. - -- The C compiler identification is GNU 7.3.1 - -- The CXX compiler identification is GNU 7.3.1 - -- Check for working C compiler: /opt/rh/devtoolset-7/root/usr/bin/cc - -- Check for working C compiler: /opt/rh/devtoolset-7/root/usr/bin/cc -- works - -- Detecting C compiler ABI info - -- Detecting C compiler ABI info - done - -- Detecting C compile features - -- Detecting C compile features - done - -- Check for working CXX compiler: /opt/rh/devtoolset-7/root/usr/bin/c++ - -- Check for working CXX compiler: /opt/rh/devtoolset-7/root/usr/bin/c++ -- works - -- Detecting CXX compiler ABI info - -- Detecting CXX compiler ABI info - done - -- Detecting CXX compile features - -- Detecting CXX compile features - done - -- Looking for pthread.h - -- Looking for pthread.h - found - -- Looking for pthread_create - -- Looking for pthread_create - not found - -- Looking for pthread_create in pthreads - -- Looking for pthread_create in pthreads - not found - -- Looking for pthread_create in pthread - -- Looking for pthread_create in pthread - found - -- Found Threads: TRUE - -- Found torch: /torchbind_tutorial/libtorch/lib/libtorch.so - -- Configuring done - -- Generating done - -- Build files have been written to: /torchbind_tutorial/build - $ make -j - Scanning dependencies of target custom_class - [ 50%] Building CXX object CMakeFiles/custom_class.dir/class.cpp.o - [100%] Linking CXX shared library libcustom_class.so - [100%] Built target custom_class - -What you'll find is there is now (among other things) a dynamic library -file present in the build directory. On Linux, this is probably named -``libcustom_class.so``. So the file tree should look like:: - - custom_class_project/ - class.cpp - CMakeLists.txt - build/ - libcustom_class.so - -Using the C++ Class from Python and TorchScript ------------------------------------------------ - -Now that we have our class and its registration compiled into an ``.so`` file, -we can load that `.so` into Python and try it out. Here's a script that -demonstrates that: - -.. literalinclude:: ../advanced_source/torch_script_custom_classes/custom_class_project/custom_test.py - :language: python - - -Saving, Loading, and Running TorchScript Code Using Custom Classes ------------------------------------------------------------------- - -We can also use custom-registered C++ classes in a C++ process using -libtorch. As an example, let's define a simple ``nn.Module`` that -instantiates and calls a method on our MyStackClass class: - -.. literalinclude:: ../advanced_source/torch_script_custom_classes/custom_class_project/save.py - :language: python - -``foo.pt`` in our filesystem now contains the serialized TorchScript -program we've just defined. - -Now, we're going to define a new CMake project to show how you can load -this model and its required .so file. For a full treatment of how to do this, -please have a look at the `Loading a TorchScript Model in C++ Tutorial `_. - -Similarly to before, let's create a file structure containing the following:: - - cpp_inference_example/ - infer.cpp - CMakeLists.txt - foo.pt - build/ - custom_class_project/ - class.cpp - CMakeLists.txt - build/ - -Notice we've copied over the serialized ``foo.pt`` file, as well as the source -tree from the ``custom_class_project`` above. We will be adding the -``custom_class_project`` as a dependency to this C++ project so that we can -build the custom class into the binary. - -Let's populate ``infer.cpp`` with the following: - -.. literalinclude:: ../advanced_source/torch_script_custom_classes/infer.cpp - :language: cpp - -And similarly let's define our CMakeLists.txt file: - -.. literalinclude:: ../advanced_source/torch_script_custom_classes/CMakeLists.txt - :language: cpp - -You know the drill: ``cd build``, ``cmake``, and ``make``: - -.. code-block:: shell - - $ cd build - $ cmake -DCMAKE_PREFIX_PATH="$(python -c 'import torch.utils; print(torch.utils.cmake_prefix_path)')" .. - -- The C compiler identification is GNU 7.3.1 - -- The CXX compiler identification is GNU 7.3.1 - -- Check for working C compiler: /opt/rh/devtoolset-7/root/usr/bin/cc - -- Check for working C compiler: /opt/rh/devtoolset-7/root/usr/bin/cc -- works - -- Detecting C compiler ABI info - -- Detecting C compiler ABI info - done - -- Detecting C compile features - -- Detecting C compile features - done - -- Check for working CXX compiler: /opt/rh/devtoolset-7/root/usr/bin/c++ - -- Check for working CXX compiler: /opt/rh/devtoolset-7/root/usr/bin/c++ -- works - -- Detecting CXX compiler ABI info - -- Detecting CXX compiler ABI info - done - -- Detecting CXX compile features - -- Detecting CXX compile features - done - -- Looking for pthread.h - -- Looking for pthread.h - found - -- Looking for pthread_create - -- Looking for pthread_create - not found - -- Looking for pthread_create in pthreads - -- Looking for pthread_create in pthreads - not found - -- Looking for pthread_create in pthread - -- Looking for pthread_create in pthread - found - -- Found Threads: TRUE - -- Found torch: /local/miniconda3/lib/python3.7/site-packages/torch/lib/libtorch.so - -- Configuring done - -- Generating done - -- Build files have been written to: /cpp_inference_example/build - $ make -j - Scanning dependencies of target custom_class - [ 25%] Building CXX object custom_class_project/CMakeFiles/custom_class.dir/class.cpp.o - [ 50%] Linking CXX shared library libcustom_class.so - [ 50%] Built target custom_class - Scanning dependencies of target infer - [ 75%] Building CXX object CMakeFiles/infer.dir/infer.cpp.o - [100%] Linking CXX executable infer - [100%] Built target infer - -And now we can run our exciting C++ binary: - -.. code-block:: shell - - $ ./infer - momfoobarbaz - -Incredible! - -Moving Custom Classes To/From IValues -------------------------------------- - -It's also possible that you may need to move custom classes into or out of -``IValue``s, such as when you take or return ``IValue``s from TorchScript methods -or you want to instantiate a custom class attribute in C++. For creating an -``IValue`` from a custom C++ class instance: - -- ``torch::make_custom_class()`` provides an API similar to c10::intrusive_ptr - in that it will take whatever set of arguments you provide to it, call the constructor - of T that matches that set of arguments, and wrap that instance up and return it. - However, instead of returning just a pointer to a custom class object, it returns - an ``IValue`` wrapping the object. You can then pass this ``IValue`` directly to - TorchScript. -- In the event that you already have an ``intrusive_ptr`` pointing to your class, you - can directly construct an IValue from it using the constructor ``IValue(intrusive_ptr)``. - -For converting ``IValue`` back to custom classes: - -- ``IValue::toCustomClass()`` will return an ``intrusive_ptr`` pointing to the - custom class that the ``IValue`` contains. Internally, this function is checking - that ``T`` is registered as a custom class and that the ``IValue`` does in fact contain - a custom class. You can check whether the ``IValue`` contains a custom class manually by - calling ``isCustomClass()``. - -Defining Serialization/Deserialization Methods for Custom C++ Classes ---------------------------------------------------------------------- - -If you try to save a ``ScriptModule`` with a custom-bound C++ class as -an attribute, you'll get the following error: - -.. literalinclude:: ../advanced_source/torch_script_custom_classes/custom_class_project/export_attr.py - :language: python - -.. code-block:: shell - - $ python export_attr.py - RuntimeError: Cannot serialize custom bound C++ class __torch__.torch.classes.my_classes.MyStackClass. Please define serialization methods via def_pickle for this class. (pushIValueImpl at ../torch/csrc/jit/pickler.cpp:128) - -This is because TorchScript cannot automatically figure out what information -save from your C++ class. You must specify that manually. The way to do that -is to define ``__getstate__`` and ``__setstate__`` methods on the class using -the special ``def_pickle`` method on ``class_``. - -.. note:: - The semantics of ``__getstate__`` and ``__setstate__`` in TorchScript are - equivalent to that of the Python pickle module. You can - `read more `_ - about how we use these methods. - -Here is an example of the ``def_pickle`` call we can add to the registration of -``MyStackClass`` to include serialization methods: - -.. literalinclude:: ../advanced_source/torch_script_custom_classes/custom_class_project/class.cpp - :language: cpp - :start-after: BEGIN def_pickle - :end-before: END def_pickle - -.. note:: - We take a different approach from pybind11 in the pickle API. Whereas pybind11 - as a special function ``pybind11::pickle()`` which you pass into ``class_::def()``, - we have a separate method ``def_pickle`` for this purpose. This is because the - name ``torch::jit::pickle`` was already taken, and we didn't want to cause confusion. - -Once we have defined the (de)serialization behavior in this way, our script can -now run successfully: - -.. code-block:: shell - - $ python ../export_attr.py - testing - -Defining Custom Operators that Take or Return Bound C++ Classes ---------------------------------------------------------------- - -Once you've defined a custom C++ class, you can also use that class -as an argument or return from a custom operator (i.e. free functions). Suppose -you have the following free function: - -.. literalinclude:: ../advanced_source/torch_script_custom_classes/custom_class_project/class.cpp - :language: cpp - :start-after: BEGIN free_function - :end-before: END free_function - -You can register it running the following code inside your ``TORCH_LIBRARY`` -block: - -.. literalinclude:: ../advanced_source/torch_script_custom_classes/custom_class_project/class.cpp - :language: cpp - :start-after: BEGIN def_free - :end-before: END def_free - -Refer to the `custom op tutorial `_ -for more details on the registration API. - -Once this is done, you can use the op like the following example: - -.. code-block:: python - - class TryCustomOp(torch.nn.Module): - def __init__(self): - super(TryCustomOp, self).__init__() - self.f = torch.classes.my_classes.MyStackClass(["foo", "bar"]) - - def forward(self): - return torch.ops.my_classes.manipulate_instance(self.f) - -.. note:: - - Registration of an operator that takes a C++ class as an argument requires that - the custom class has already been registered. You can enforce this by - making sure the custom class registration and your free function definitions - are in the same ``TORCH_LIBRARY`` block, and that the custom class - registration comes first. In the future, we may relax this requirement, - so that these can be registered in any order. - - -Conclusion ----------- - -This tutorial walked you through how to expose a C++ class to TorchScript -(and by extension Python), how to register its methods, how to use that -class from Python and TorchScript, and how to save and load code using -the class and run that code in a standalone C++ process. You are now ready -to extend your TorchScript models with C++ classes that interface with -third party C++ libraries or implement any other use case that requires the -lines between Python, TorchScript and C++ to blend smoothly. - -As always, if you run into any problems or have questions, you can use our -`forum `_ or `GitHub issues -`_ to get in touch. Also, our -`frequently asked questions (FAQ) page -`_ may have helpful information. +.. warning:: + TorchScript is deprecated, please use + `torch.export`_ instead. \ No newline at end of file diff --git a/advanced_source/torch_script_custom_classes/CMakeLists.txt b/advanced_source/torch_script_custom_classes/CMakeLists.txt deleted file mode 100644 index 6a1eb3e87fa..00000000000 --- a/advanced_source/torch_script_custom_classes/CMakeLists.txt +++ /dev/null @@ -1,15 +0,0 @@ -cmake_minimum_required(VERSION 3.1 FATAL_ERROR) -project(infer) - -find_package(Torch REQUIRED) - -add_subdirectory(custom_class_project) - -# Define our library target -add_executable(infer infer.cpp) -set(CMAKE_CXX_STANDARD 14) -# Link against LibTorch -target_link_libraries(infer "${TORCH_LIBRARIES}") -# This is where we link in our libcustom_class code, making our -# custom class available in our binary. -target_link_libraries(infer -Wl,--no-as-needed custom_class) diff --git a/advanced_source/torch_script_custom_classes/custom_class_project/CMakeLists.txt b/advanced_source/torch_script_custom_classes/custom_class_project/CMakeLists.txt deleted file mode 100644 index bb3d41aa997..00000000000 --- a/advanced_source/torch_script_custom_classes/custom_class_project/CMakeLists.txt +++ /dev/null @@ -1,10 +0,0 @@ -cmake_minimum_required(VERSION 3.1 FATAL_ERROR) -project(custom_class) - -find_package(Torch REQUIRED) - -# Define our library target -add_library(custom_class SHARED class.cpp) -set(CMAKE_CXX_STANDARD 14) -# Link against LibTorch -target_link_libraries(custom_class "${TORCH_LIBRARIES}") diff --git a/advanced_source/torch_script_custom_classes/custom_class_project/class.cpp b/advanced_source/torch_script_custom_classes/custom_class_project/class.cpp deleted file mode 100644 index dc89a3ecb2e..00000000000 --- a/advanced_source/torch_script_custom_classes/custom_class_project/class.cpp +++ /dev/null @@ -1,132 +0,0 @@ -// BEGIN class -// This header is all you need to do the C++ portions of this -// tutorial -#include -// This header is what defines the custom class registration -// behavior specifically. script.h already includes this, but -// we include it here so you know it exists in case you want -// to look at the API or implementation. -#include - -#include -#include - -template -struct MyStackClass : torch::CustomClassHolder { - std::vector stack_; - MyStackClass(std::vector init) : stack_(init.begin(), init.end()) {} - - void push(T x) { - stack_.push_back(x); - } - T pop() { - auto val = stack_.back(); - stack_.pop_back(); - return val; - } - - c10::intrusive_ptr clone() const { - return c10::make_intrusive(stack_); - } - - void merge(const c10::intrusive_ptr& c) { - for (auto& elem : c->stack_) { - push(elem); - } - } -}; -// END class - -// BEGIN free_function -c10::intrusive_ptr> manipulate_instance(const c10::intrusive_ptr>& instance) { - instance->pop(); - return instance; -} -// END free_function - -// BEGIN binding -// Notice a few things: -// - We pass the class to be registered as a template parameter to -// `torch::class_`. In this instance, we've passed the -// specialization of the MyStackClass class ``MyStackClass``. -// In general, you cannot register a non-specialized template -// class. For non-templated classes, you can just pass the -// class name directly as the template parameter. -// - The arguments passed to the constructor make up the "qualified name" -// of the class. In this case, the registered class will appear in -// Python and C++ as `torch.classes.my_classes.MyStackClass`. We call -// the first argument the "namespace" and the second argument the -// actual class name. -TORCH_LIBRARY(my_classes, m) { - m.class_>("MyStackClass") - // The following line registers the contructor of our MyStackClass - // class that takes a single `std::vector` argument, - // i.e. it exposes the C++ method `MyStackClass(std::vector init)`. - // Currently, we do not support registering overloaded - // constructors, so for now you can only `def()` one instance of - // `torch::init`. - .def(torch::init>()) - // The next line registers a stateless (i.e. no captures) C++ lambda - // function as a method. Note that a lambda function must take a - // `c10::intrusive_ptr` (or some const/ref version of that) - // as the first argument. Other arguments can be whatever you want. - .def("top", [](const c10::intrusive_ptr>& self) { - return self->stack_.back(); - }) - // The following four lines expose methods of the MyStackClass - // class as-is. `torch::class_` will automatically examine the - // argument and return types of the passed-in method pointers and - // expose these to Python and TorchScript accordingly. Finally, notice - // that we must take the *address* of the fully-qualified method name, - // i.e. use the unary `&` operator, due to C++ typing rules. - .def("push", &MyStackClass::push) - .def("pop", &MyStackClass::pop) - .def("clone", &MyStackClass::clone) - .def("merge", &MyStackClass::merge) -// END binding -#ifndef NO_PICKLE -// BEGIN def_pickle - // class_<>::def_pickle allows you to define the serialization - // and deserialization methods for your C++ class. - // Currently, we only support passing stateless lambda functions - // as arguments to def_pickle - .def_pickle( - // __getstate__ - // This function defines what data structure should be produced - // when we serialize an instance of this class. The function - // must take a single `self` argument, which is an intrusive_ptr - // to the instance of the object. The function can return - // any type that is supported as a return value of the TorchScript - // custom operator API. In this instance, we've chosen to return - // a std::vector as the salient data to preserve - // from the class. - [](const c10::intrusive_ptr>& self) - -> std::vector { - return self->stack_; - }, - // __setstate__ - // This function defines how to create a new instance of the C++ - // class when we are deserializing. The function must take a - // single argument of the same type as the return value of - // `__getstate__`. The function must return an intrusive_ptr - // to a new instance of the C++ class, initialized however - // you would like given the serialized state. - [](std::vector state) - -> c10::intrusive_ptr> { - // A convenient way to instantiate an object and get an - // intrusive_ptr to it is via `make_intrusive`. We use - // that here to allocate an instance of MyStackClass - // and call the single-argument std::vector - // constructor with the serialized state. - return c10::make_intrusive>(std::move(state)); - }); -// END def_pickle -#endif // NO_PICKLE - -// BEGIN def_free - m.def( - "manipulate_instance(__torch__.torch.classes.my_classes.MyStackClass x) -> __torch__.torch.classes.my_classes.MyStackClass Y", - manipulate_instance - ); -// END def_free -} diff --git a/advanced_source/torch_script_custom_classes/custom_class_project/custom_test.py b/advanced_source/torch_script_custom_classes/custom_class_project/custom_test.py deleted file mode 100644 index e8c38638f6c..00000000000 --- a/advanced_source/torch_script_custom_classes/custom_class_project/custom_test.py +++ /dev/null @@ -1,54 +0,0 @@ -import torch - -# `torch.classes.load_library()` allows you to pass the path to your .so file -# to load it in and make the custom C++ classes available to both Python and -# TorchScript -torch.classes.load_library("build/libcustom_class.so") -# You can query the loaded libraries like this: -print(torch.classes.loaded_libraries) -# prints {'/custom_class_project/build/libcustom_class.so'} - -# We can find and instantiate our custom C++ class in python by using the -# `torch.classes` namespace: -# -# This instantiation will invoke the MyStackClass(std::vector init) -# constructor we registered earlier -s = torch.classes.my_classes.MyStackClass(["foo", "bar"]) - -# We can call methods in Python -s.push("pushed") -assert s.pop() == "pushed" - -# Test custom operator -s.push("pushed") -torch.ops.my_classes.manipulate_instance(s) # acting as s.pop() -assert s.top() == "bar" - -# Returning and passing instances of custom classes works as you'd expect -s2 = s.clone() -s.merge(s2) -for expected in ["bar", "foo", "bar", "foo"]: - assert s.pop() == expected - -# We can also use the class in TorchScript -# For now, we need to assign the class's type to a local in order to -# annotate the type on the TorchScript function. This may change -# in the future. -MyStackClass = torch.classes.my_classes.MyStackClass - - -@torch.jit.script -def do_stacks(s: MyStackClass): # We can pass a custom class instance - # We can instantiate the class - s2 = torch.classes.my_classes.MyStackClass(["hi", "mom"]) - s2.merge(s) # We can call a method on the class - # We can also return instances of the class - # from TorchScript function/methods - return s2.clone(), s2.top() - - -stack, top = do_stacks(torch.classes.my_classes.MyStackClass(["wow"])) -assert top == "wow" -for expected in ["wow", "mom", "hi"]: - assert stack.pop() == expected - diff --git a/advanced_source/torch_script_custom_classes/custom_class_project/export_attr.py b/advanced_source/torch_script_custom_classes/custom_class_project/export_attr.py deleted file mode 100644 index 9999d5c8183..00000000000 --- a/advanced_source/torch_script_custom_classes/custom_class_project/export_attr.py +++ /dev/null @@ -1,21 +0,0 @@ -# export_attr.py -import torch - -torch.classes.load_library('build/libcustom_class.so') - - -class Foo(torch.nn.Module): - def __init__(self): - super().__init__() - self.stack = torch.classes.my_classes.MyStackClass(["just", "testing"]) - - def forward(self, s: str) -> str: - return self.stack.pop() + s - - -scripted_foo = torch.jit.script(Foo()) - -scripted_foo.save('foo.pt') -loaded = torch.jit.load('foo.pt') - -print(loaded.stack.pop()) diff --git a/advanced_source/torch_script_custom_classes/custom_class_project/save.py b/advanced_source/torch_script_custom_classes/custom_class_project/save.py deleted file mode 100644 index 8826f95da7c..00000000000 --- a/advanced_source/torch_script_custom_classes/custom_class_project/save.py +++ /dev/null @@ -1,18 +0,0 @@ -import torch - -torch.classes.load_library('build/libcustom_class.so') - - -class Foo(torch.nn.Module): - def __init__(self): - super().__init__() - - def forward(self, s: str) -> str: - stack = torch.classes.my_classes.MyStackClass(["hi", "mom"]) - return stack.pop() + s - - -scripted_foo = torch.jit.script(Foo()) -print(scripted_foo.graph) - -scripted_foo.save('foo.pt') diff --git a/advanced_source/torch_script_custom_classes/infer.cpp b/advanced_source/torch_script_custom_classes/infer.cpp deleted file mode 100644 index 1ca5b002383..00000000000 --- a/advanced_source/torch_script_custom_classes/infer.cpp +++ /dev/null @@ -1,20 +0,0 @@ -#include - -#include -#include - -int main(int argc, const char* argv[]) { - torch::jit::Module module; - try { - // Deserialize the ScriptModule from a file using torch::jit::load(). - module = torch::jit::load("foo.pt"); - } - catch (const c10::Error& e) { - std::cerr << "error loading the model\n"; - return -1; - } - - std::vector inputs = {"foobarbaz"}; - auto output = module.forward(inputs).toString(); - std::cout << output->string() << std::endl; -} diff --git a/advanced_source/torch_script_custom_classes/run.sh b/advanced_source/torch_script_custom_classes/run.sh deleted file mode 100755 index 52c59581309..00000000000 --- a/advanced_source/torch_script_custom_classes/run.sh +++ /dev/null @@ -1,21 +0,0 @@ -#!/bin/bash - -set -ex - -rm -rf build -rm -rf custom_class_project/build - -pushd custom_class_project - mkdir build - (cd build && cmake CXXFLAGS="-DNO_PICKLE" -DCMAKE_PREFIX_PATH="$(python -c 'import torch.utils; print(torch.utils.cmake_prefix_path)')" ..) - (cd build && make) - python custom_test.py - python save.py - ! python export_attr.py -popd - -mkdir build -(cd build && cmake -DCMAKE_PREFIX_PATH="$(python -c 'import torch.utils; print(torch.utils.cmake_prefix_path)')" ..) -(cd build && make) -mv custom_class_project/foo.pt build/foo.pt -(cd build && ./infer) diff --git a/advanced_source/torch_script_custom_classes/run2.sh b/advanced_source/torch_script_custom_classes/run2.sh deleted file mode 100755 index d4ef0101a83..00000000000 --- a/advanced_source/torch_script_custom_classes/run2.sh +++ /dev/null @@ -1,13 +0,0 @@ -#!/bin/bash - -set -ex - -rm -rf build -rm -rf custom_class_project/build - -pushd custom_class_project - mkdir build - (cd build && cmake -DCMAKE_PREFIX_PATH="$(python -c 'import torch.utils; print(torch.utils.cmake_prefix_path)')" ..) - (cd build && make) - python export_attr.py -popd diff --git a/advanced_source/torch_script_custom_ops.rst b/advanced_source/torch_script_custom_ops.rst index 0a0e6e2bd70..3d2042bd6b7 100644 --- a/advanced_source/torch_script_custom_ops.rst +++ b/advanced_source/torch_script_custom_ops.rst @@ -1,1038 +1,3 @@ -Extending TorchScript with Custom C++ Operators -=============================================== - .. warning:: - - This tutorial is deprecated as of PyTorch 2.4. Please see :ref:`custom-ops-landing-page` - for the newest up-to-date guides on PyTorch Custom Operators. - -The PyTorch 1.0 release introduced a new programming model to PyTorch called -`TorchScript `_. TorchScript is a -subset of the Python programming language which can be parsed, compiled and -optimized by the TorchScript compiler. Further, compiled TorchScript models have -the option of being serialized into an on-disk file format, which you can -subsequently load and run from pure C++ (as well as Python) for inference. - -TorchScript supports a large subset of operations provided by the ``torch`` -package, allowing you to express many kinds of complex models purely as a series -of tensor operations from PyTorch's "standard library". Nevertheless, there may -be times where you find yourself in need of extending TorchScript with a custom -C++ or CUDA function. While we recommend that you only resort to this option if -your idea cannot be expressed (efficiently enough) as a simple Python function, -we do provide a very friendly and simple interface for defining custom C++ and -CUDA kernels using `ATen `_, PyTorch's high -performance C++ tensor library. Once bound into TorchScript, you can embed these -custom kernels (or "ops") into your TorchScript model and execute them both in -Python and in their serialized form directly in C++. - -The following paragraphs give an example of writing a TorchScript custom op to -call into `OpenCV `_, a computer vision library written -in C++. We will discuss how to work with tensors in C++, how to efficiently -convert them to third party tensor formats (in this case, OpenCV ``Mat``), how -to register your operator with the TorchScript runtime and finally how to -compile the operator and use it in Python and C++. - -Implementing the Custom Operator in C++ ---------------------------------------- - -For this tutorial, we'll be exposing the `warpPerspective -`_ -function, which applies a perspective transformation to an image, from OpenCV to -TorchScript as a custom operator. The first step is to write the implementation -of our custom operator in C++. Let's call the file for this implementation -``op.cpp`` and make it look like this: - -.. literalinclude:: ../advanced_source/torch_script_custom_ops/op.cpp - :language: cpp - :start-after: BEGIN warp_perspective - :end-before: END warp_perspective - -The code for this operator is quite short. At the top of the file, we include -the OpenCV header file, ``opencv2/opencv.hpp``, alongside the ``torch/script.h`` -header which exposes all the necessary goodies from PyTorch's C++ API that we -need to write custom TorchScript operators. Our function ``warp_perspective`` -takes two arguments: an input ``image`` and the ``warp`` transformation matrix -we wish to apply to the image. The type of these inputs is ``torch::Tensor``, -PyTorch's tensor type in C++ (which is also the underlying type of all tensors -in Python). The return type of our ``warp_perspective`` function will also be a -``torch::Tensor``. - -.. tip:: - - See `this note `_ for - more information about ATen, the library that provides the ``Tensor`` class to - PyTorch. Further, `this tutorial - `_ describes how to - allocate and initialize new tensor objects in C++ (not required for this - operator). - -.. attention:: - - The TorchScript compiler understands a fixed number of types. Only these types - can be used as arguments to your custom operator. Currently these types are: - ``torch::Tensor``, ``torch::Scalar``, ``double``, ``int64_t`` and - ``std::vector`` s of these types. Note that *only* ``double`` and *not* - ``float``, and *only* ``int64_t`` and *not* other integral types such as - ``int``, ``short`` or ``long`` are supported. - -Inside of our function, the first thing we need to do is convert our PyTorch -tensors to OpenCV matrices, as OpenCV's ``warpPerspective`` expects ``cv::Mat`` -objects as inputs. Fortunately, there is a way to do this **without copying -any** data. In the first few lines, - -.. literalinclude:: ../advanced_source/torch_script_custom_ops/op.cpp - :language: cpp - :start-after: BEGIN image_mat - :end-before: END image_mat - -we are calling `this constructor -`_ -of the OpenCV ``Mat`` class to convert our tensor to a ``Mat`` object. We pass -it the number of rows and columns of the original ``image`` tensor, the datatype -(which we'll fix as ``float32`` for this example), and finally a raw pointer to -the underlying data -- a ``float*``. What is special about this constructor of -the ``Mat`` class is that it does not copy the input data. Instead, it will -simply reference this memory for all operations performed on the ``Mat``. If an -in-place operation is performed on the ``image_mat``, this will be reflected in -the original ``image`` tensor (and vice-versa). This allows us to call -subsequent OpenCV routines with the library's native matrix type, even though -we're actually storing the data in a PyTorch tensor. We repeat this procedure to -convert the ``warp`` PyTorch tensor to the ``warp_mat`` OpenCV matrix: - -.. literalinclude:: ../advanced_source/torch_script_custom_ops/op.cpp - :language: cpp - :start-after: BEGIN warp_mat - :end-before: END warp_mat - -Next, we are ready to call the OpenCV function we were so eager to use in -TorchScript: ``warpPerspective``. For this, we pass the OpenCV function the -``image_mat`` and ``warp_mat`` matrices, as well as an empty output matrix -called ``output_mat``. We also specify the size ``dsize`` we want the output -matrix (image) to be. It is hardcoded to ``8 x 8`` for this example: - -.. literalinclude:: ../advanced_source/torch_script_custom_ops/op.cpp - :language: cpp - :start-after: BEGIN output_mat - :end-before: END output_mat - -The final step in our custom operator implementation is to convert the -``output_mat`` back into a PyTorch tensor, so that we can further use it in -PyTorch. This is strikingly similar to what we did earlier to convert in the -other direction. In this case, PyTorch provides a ``torch::from_blob`` method. A -*blob* in this case is intended to mean some opaque, flat pointer to memory that -we want to interpret as a PyTorch tensor. The call to ``torch::from_blob`` looks -like this: - -.. literalinclude:: ../advanced_source/torch_script_custom_ops/op.cpp - :language: cpp - :start-after: BEGIN output_tensor - :end-before: END output_tensor - -We use the ``.ptr()`` method on the OpenCV ``Mat`` class to get a raw -pointer to the underlying data (just like ``.data_ptr()`` for the PyTorch -tensor earlier). We also specify the output shape of the tensor, which we -hardcoded as ``8 x 8``. The output of ``torch::from_blob`` is then a -``torch::Tensor``, pointing to the memory owned by the OpenCV matrix. - -Before returning this tensor from our operator implementation, we must call -``.clone()`` on the tensor to perform a memory copy of the underlying data. The -reason for this is that ``torch::from_blob`` returns a tensor that does not own -its data. At that point, the data is still owned by the OpenCV matrix. However, -this OpenCV matrix will go out of scope and be deallocated at the end of the -function. If we returned the ``output`` tensor as-is, it would point to invalid -memory by the time we use it outside the function. Calling ``.clone()`` returns -a new tensor with a copy of the original data that the new tensor owns itself. -It is thus safe to return to the outside world. - -Registering the Custom Operator with TorchScript ------------------------------------------------- - -Now that have implemented our custom operator in C++, we need to *register* it -with the TorchScript runtime and compiler. This will allow the TorchScript -compiler to resolve references to our custom operator in TorchScript code. -If you have ever used the pybind11 library, our syntax for registration -resembles the pybind11 syntax very closely. To register a single function, -we write: - -.. literalinclude:: ../advanced_source/torch_script_custom_ops/op.cpp - :language: cpp - :start-after: BEGIN registry - :end-before: END registry - -somewhere at the top level of our ``op.cpp`` file. The ``TORCH_LIBRARY`` macro -creates a function that will be called when your program starts. The name -of your library (``my_ops``) is given as the first argument (it should not -be in quotes). The second argument (``m``) defines a variable of type -``torch::Library`` which is the main interface to register your operators. -The method ``Library::def`` actually creates an operator named ``warp_perspective``, -exposing it to both Python and TorchScript. You can define as many operators -as you like by making multiple calls to ``def``. - -Behinds the scenes, the ``def`` function is actually doing quite a bit of work: -it is using template metaprogramming to inspect the type signature of your -function and translate it into an operator schema which specifies the operators -type within TorchScript's type system. - -Building the Custom Operator ----------------------------- - -Now that we have implemented our custom operator in C++ and written its -registration code, it is time to build the operator into a (shared) library that -we can load into Python for research and experimentation, or into C++ for -inference in a no-Python environment. There exist multiple ways to build our -operator, using either pure CMake, or Python alternatives like ``setuptools``. -For brevity, the paragraphs below only discuss the CMake approach. The appendix -of this tutorial dives into other alternatives. - -Environment setup -***************** - -We need an installation of PyTorch and OpenCV. The easiest and most platform -independent way to get both is to via Conda:: - - conda install -c pytorch pytorch - conda install opencv - -Building with CMake -******************* - -To build our custom operator into a shared library using the `CMake -`_ build system, we need to write a short ``CMakeLists.txt`` -file and place it with our previous ``op.cpp`` file. For this, let's agree on a -a directory structure that looks like this:: - - warp-perspective/ - op.cpp - CMakeLists.txt - -The contents of our ``CMakeLists.txt`` file should then be the following: - -.. literalinclude:: ../advanced_source/torch_script_custom_ops/CMakeLists.txt - :language: cpp - -To now build our operator, we can run the following commands from our -``warp_perspective`` folder: - -.. code-block:: shell - - $ mkdir build - $ cd build - $ cmake -DCMAKE_PREFIX_PATH="$(python -c 'import torch.utils; print(torch.utils.cmake_prefix_path)')" .. - -- The C compiler identification is GNU 5.4.0 - -- The CXX compiler identification is GNU 5.4.0 - -- Check for working C compiler: /usr/bin/cc - -- Check for working C compiler: /usr/bin/cc -- works - -- Detecting C compiler ABI info - -- Detecting C compiler ABI info - done - -- Detecting C compile features - -- Detecting C compile features - done - -- Check for working CXX compiler: /usr/bin/c++ - -- Check for working CXX compiler: /usr/bin/c++ -- works - -- Detecting CXX compiler ABI info - -- Detecting CXX compiler ABI info - done - -- Detecting CXX compile features - -- Detecting CXX compile features - done - -- Looking for pthread.h - -- Looking for pthread.h - found - -- Looking for pthread_create - -- Looking for pthread_create - not found - -- Looking for pthread_create in pthreads - -- Looking for pthread_create in pthreads - not found - -- Looking for pthread_create in pthread - -- Looking for pthread_create in pthread - found - -- Found Threads: TRUE - -- Found torch: /libtorch/lib/libtorch.so - -- Configuring done - -- Generating done - -- Build files have been written to: /warp_perspective/build - $ make -j - Scanning dependencies of target warp_perspective - [ 50%] Building CXX object CMakeFiles/warp_perspective.dir/op.cpp.o - [100%] Linking CXX shared library libwarp_perspective.so - [100%] Built target warp_perspective - -which will place a ``libwarp_perspective.so`` shared library file in the -``build`` folder. In the ``cmake`` command above, we use the helper -variable ``torch.utils.cmake_prefix_path`` to conveniently tell us where -the cmake files for our PyTorch install are. - -We will explore how to use and call our operator in detail further below, but to -get an early sensation of success, we can try running the following code in -Python: - -.. literalinclude:: ../advanced_source/torch_script_custom_ops/smoke_test.py - :language: python - -If all goes well, this should print something like:: - - - -which is the Python function we will later use to invoke our custom operator. - -Using the TorchScript Custom Operator in Python ------------------------------------------------ - -Once our custom operator is built into a shared library we are ready to use -this operator in our TorchScript models in Python. There are two parts to this: -first loading the operator into Python, and second using the operator in -TorchScript code. - -You already saw how to import your operator into Python: -``torch.ops.load_library()``. This function takes the path to a shared library -containing custom operators, and loads it into the current process. Loading the -shared library will also execute the ``TORCH_LIBRARY`` block. This will register -our custom operator with the TorchScript compiler and allow us to use that -operator in TorchScript code. - -You can refer to your loaded operator as ``torch.ops..``, -where ```` is the namespace part of your operator name, and -```` the function name of your operator. For the operator we wrote -above, the namespace was ``my_ops`` and the function name ``warp_perspective``, -which means our operator is available as ``torch.ops.my_ops.warp_perspective``. -While this function can be used in scripted or traced TorchScript modules, we -can also just use it in vanilla eager PyTorch and pass it regular PyTorch -tensors: - -.. literalinclude:: ../advanced_source/torch_script_custom_ops/test.py - :language: python - :prepend: import torch - :start-after: BEGIN preamble - :end-before: END preamble - -producing: - -.. code-block:: python - - tensor([[0.0000, 0.3218, 0.4611, ..., 0.4636, 0.4636, 0.4636], - [0.3746, 0.0978, 0.5005, ..., 0.4636, 0.4636, 0.4636], - [0.3245, 0.0169, 0.0000, ..., 0.4458, 0.4458, 0.4458], - ..., - [0.1862, 0.1862, 0.1692, ..., 0.0000, 0.0000, 0.0000], - [0.1862, 0.1862, 0.1692, ..., 0.0000, 0.0000, 0.0000], - [0.1862, 0.1862, 0.1692, ..., 0.0000, 0.0000, 0.0000]]) - - -.. note:: - - What happens behind the scenes is that the first time you access - ``torch.ops.namespace.function`` in Python, the TorchScript compiler (in C++ - land) will see if a function ``namespace::function`` has been registered, and - if so, return a Python handle to this function that we can subsequently use to - call into our C++ operator implementation from Python. This is one noteworthy - difference between TorchScript custom operators and C++ extensions: C++ - extensions are bound manually using pybind11, while TorchScript custom ops are - bound on the fly by PyTorch itself. Pybind11 gives you more flexibility with - regards to what types and classes you can bind into Python and is thus - recommended for purely eager code, but it is not supported for TorchScript - ops. - -From here on, you can use your custom operator in scripted or traced code just -as you would other functions from the ``torch`` package. In fact, "standard -library" functions like ``torch.matmul`` go through largely the same -registration path as custom operators, which makes custom operators really -first-class citizens when it comes to how and where they can be used in -TorchScript. (One difference, however, is that standard library functions -have custom written Python argument parsing logic that differs from -``torch.ops`` argument parsing.) - -Using the Custom Operator with Tracing -************************************** - -Let's start by embedding our operator in a traced function. Recall that for -tracing, we start with some vanilla Pytorch code: - -.. literalinclude:: ../advanced_source/torch_script_custom_ops/test.py - :language: python - :start-after: BEGIN compute - :end-before: END compute - -and then call ``torch.jit.trace`` on it. We further pass ``torch.jit.trace`` -some example inputs, which it will forward to our implementation to record the -sequence of operations that occur as the inputs flow through it. The result of -this is effectively a "frozen" version of the eager PyTorch program, which the -TorchScript compiler can further analyze, optimize and serialize: - -.. literalinclude:: ../advanced_source/torch_script_custom_ops/test.py - :language: python - :start-after: BEGIN trace - :end-before: END trace - -Producing:: - - graph(%x : Float(4:8, 8:1), - %y : Float(8:5, 5:1), - %z : Float(4:5, 5:1)): - %3 : Float(4:5, 5:1) = aten::matmul(%x, %y) # test.py:10:0 - %4 : Float(4:5, 5:1) = aten::relu(%z) # test.py:10:0 - %5 : int = prim::Constant[value=1]() # test.py:10:0 - %6 : Float(4:5, 5:1) = aten::add(%3, %4, %5) # test.py:10:0 - return (%6) - -Now, the exciting revelation is that we can simply drop our custom operator into -our PyTorch trace as if it were ``torch.relu`` or any other ``torch`` function: - -.. literalinclude:: ../advanced_source/torch_script_custom_ops/test.py - :language: python - :start-after: BEGIN compute2 - :end-before: END compute2 - -and then trace it as before: - -.. literalinclude:: ../advanced_source/torch_script_custom_ops/test.py - :language: python - :start-after: BEGIN trace2 - :end-before: END trace2 - -Producing:: - - graph(%x.1 : Float(4:8, 8:1), - %y : Float(8:5, 5:1), - %z : Float(8:5, 5:1)): - %3 : int = prim::Constant[value=3]() # test.py:25:0 - %4 : int = prim::Constant[value=6]() # test.py:25:0 - %5 : int = prim::Constant[value=0]() # test.py:25:0 - %6 : Device = prim::Constant[value="cpu"]() # test.py:25:0 - %7 : bool = prim::Constant[value=0]() # test.py:25:0 - %8 : Float(3:3, 3:1) = aten::eye(%3, %4, %5, %6, %7) # test.py:25:0 - %x : Float(8:8, 8:1) = my_ops::warp_perspective(%x.1, %8) # test.py:25:0 - %10 : Float(8:5, 5:1) = aten::matmul(%x, %y) # test.py:26:0 - %11 : Float(8:5, 5:1) = aten::relu(%z) # test.py:26:0 - %12 : int = prim::Constant[value=1]() # test.py:26:0 - %13 : Float(8:5, 5:1) = aten::add(%10, %11, %12) # test.py:26:0 - return (%13) - -Integrating TorchScript custom ops into traced PyTorch code is as easy as this! - -Using the Custom Operator with Script -************************************* - -Besides tracing, another way to arrive at a TorchScript representation of a -PyTorch program is to directly write your code *in* TorchScript. TorchScript is -largely a subset of the Python language, with some restrictions that make it -easier for the TorchScript compiler to reason about programs. You turn your -regular PyTorch code into TorchScript by annotating it with -``@torch.jit.script`` for free functions and ``@torch.jit.script_method`` for -methods in a class (which must also derive from ``torch.jit.ScriptModule``). See -`here `_ for more details on -TorchScript annotations. - -One particular reason to use TorchScript instead of tracing is that tracing is -unable to capture control flow in PyTorch code. As such, let us consider this -function which does use control flow: - -.. code-block:: python - - def compute(x, y): - if bool(x[0][0] == 42): - z = 5 - else: - z = 10 - return x.matmul(y) + z - -To convert this function from vanilla PyTorch to TorchScript, we annotate it -with ``@torch.jit.script``: - -.. code-block:: python - - @torch.jit.script - def compute(x, y): - if bool(x[0][0] == 42): - z = 5 - else: - z = 10 - return x.matmul(y) + z - -This will just-in-time compile the ``compute`` function into a graph -representation, which we can inspect in the ``compute.graph`` property: - -.. code-block:: python - - >>> compute.graph - graph(%x : Dynamic - %y : Dynamic) { - %14 : int = prim::Constant[value=1]() - %2 : int = prim::Constant[value=0]() - %7 : int = prim::Constant[value=42]() - %z.1 : int = prim::Constant[value=5]() - %z.2 : int = prim::Constant[value=10]() - %4 : Dynamic = aten::select(%x, %2, %2) - %6 : Dynamic = aten::select(%4, %2, %2) - %8 : Dynamic = aten::eq(%6, %7) - %9 : bool = prim::TensorToBool(%8) - %z : int = prim::If(%9) - block0() { - -> (%z.1) - } - block1() { - -> (%z.2) - } - %13 : Dynamic = aten::matmul(%x, %y) - %15 : Dynamic = aten::add(%13, %z, %14) - return (%15); - } - -And now, just like before, we can use our custom operator like any other -function inside of our script code: - -.. code-block:: python - - torch.ops.load_library("libwarp_perspective.so") - - @torch.jit.script - def compute(x, y): - if bool(x[0] == 42): - z = 5 - else: - z = 10 - x = torch.ops.my_ops.warp_perspective(x, torch.eye(3)) - return x.matmul(y) + z - -When the TorchScript compiler sees the reference to -``torch.ops.my_ops.warp_perspective``, it will find the implementation we -registered via the ``TORCH_LIBRARY`` function in C++, and compile it into its -graph representation: - -.. code-block:: python - - >>> compute.graph - graph(%x.1 : Dynamic - %y : Dynamic) { - %20 : int = prim::Constant[value=1]() - %16 : int[] = prim::Constant[value=[0, -1]]() - %14 : int = prim::Constant[value=6]() - %2 : int = prim::Constant[value=0]() - %7 : int = prim::Constant[value=42]() - %z.1 : int = prim::Constant[value=5]() - %z.2 : int = prim::Constant[value=10]() - %13 : int = prim::Constant[value=3]() - %4 : Dynamic = aten::select(%x.1, %2, %2) - %6 : Dynamic = aten::select(%4, %2, %2) - %8 : Dynamic = aten::eq(%6, %7) - %9 : bool = prim::TensorToBool(%8) - %z : int = prim::If(%9) - block0() { - -> (%z.1) - } - block1() { - -> (%z.2) - } - %17 : Dynamic = aten::eye(%13, %14, %2, %16) - %x : Dynamic = my_ops::warp_perspective(%x.1, %17) - %19 : Dynamic = aten::matmul(%x, %y) - %21 : Dynamic = aten::add(%19, %z, %20) - return (%21); - } - -Notice in particular the reference to ``my_ops::warp_perspective`` at the end of -the graph. - -.. attention:: - - The TorchScript graph representation is still subject to change. Do not rely - on it looking like this. - -And that's really it when it comes to using our custom operator in Python. In -short, you import the library containing your operator(s) using -``torch.ops.load_library``, and call your custom op like any other ``torch`` -operator from your traced or scripted TorchScript code. - -Using the TorchScript Custom Operator in C++ --------------------------------------------- - -One useful feature of TorchScript is the ability to serialize a model into an -on-disk file. This file can be sent over the wire, stored in a file system or, -more importantly, be dynamically deserialized and executed without needing to -keep the original source code around. This is possible in Python, but also in -C++. For this, PyTorch provides `a pure C++ API `_ -for deserializing as well as executing TorchScript models. If you haven't yet, -please read `the tutorial on loading and running serialized TorchScript models -in C++ `_, on which the -next few paragraphs will build. - -In short, custom operators can be executed just like regular ``torch`` operators -even when deserialized from a file and run in C++. The only requirement for this -is to link the custom operator shared library we built earlier with the C++ -application in which we execute the model. In Python, this worked simply calling -``torch.ops.load_library``. In C++, you need to link the shared library with -your main application in whatever build system you are using. The following -example will showcase this using CMake. - -.. note:: - - Technically, you can also dynamically load the shared library into your C++ - application at runtime in much the same way we did it in Python. On Linux, - `you can do this with dlopen - `_. There exist - equivalents on other platforms. - -Building on the C++ execution tutorial linked above, let's start with a minimal -C++ application in one file, ``main.cpp`` in a different folder from our -custom operator, that loads and executes a serialized TorchScript model: - -.. code-block:: cpp - - #include // One-stop header. - - #include - #include - - - int main(int argc, const char* argv[]) { - if (argc != 2) { - std::cerr << "usage: example-app \n"; - return -1; - } - - // Deserialize the ScriptModule from a file using torch::jit::load(). - torch::jit::script::Module module = torch::jit::load(argv[1]); - - std::vector inputs; - inputs.push_back(torch::randn({4, 8})); - inputs.push_back(torch::randn({8, 5})); - - torch::Tensor output = module.forward(std::move(inputs)).toTensor(); - - std::cout << output << std::endl; - } - -Along with a small ``CMakeLists.txt`` file: - -.. code-block:: cmake - - cmake_minimum_required(VERSION 3.1 FATAL_ERROR) - project(example_app) - - find_package(Torch REQUIRED) - - add_executable(example_app main.cpp) - target_link_libraries(example_app "${TORCH_LIBRARIES}") - target_compile_features(example_app PRIVATE cxx_range_for) - -At this point, we should be able to build the application: - -.. code-block:: shell - - $ mkdir build - $ cd build - $ cmake -DCMAKE_PREFIX_PATH="$(python -c 'import torch.utils; print(torch.utils.cmake_prefix_path)')" .. - -- The C compiler identification is GNU 5.4.0 - -- The CXX compiler identification is GNU 5.4.0 - -- Check for working C compiler: /usr/bin/cc - -- Check for working C compiler: /usr/bin/cc -- works - -- Detecting C compiler ABI info - -- Detecting C compiler ABI info - done - -- Detecting C compile features - -- Detecting C compile features - done - -- Check for working CXX compiler: /usr/bin/c++ - -- Check for working CXX compiler: /usr/bin/c++ -- works - -- Detecting CXX compiler ABI info - -- Detecting CXX compiler ABI info - done - -- Detecting CXX compile features - -- Detecting CXX compile features - done - -- Looking for pthread.h - -- Looking for pthread.h - found - -- Looking for pthread_create - -- Looking for pthread_create - not found - -- Looking for pthread_create in pthreads - -- Looking for pthread_create in pthreads - not found - -- Looking for pthread_create in pthread - -- Looking for pthread_create in pthread - found - -- Found Threads: TRUE - -- Found torch: /libtorch/lib/libtorch.so - -- Configuring done - -- Generating done - -- Build files have been written to: /example_app/build - $ make -j - Scanning dependencies of target example_app - [ 50%] Building CXX object CMakeFiles/example_app.dir/main.cpp.o - [100%] Linking CXX executable example_app - [100%] Built target example_app - -And run it without passing a model just yet: - -.. code-block:: shell - - $ ./example_app - usage: example_app - -Next, let's serialize the script function we wrote earlier that uses our custom -operator: - -.. code-block:: python - - torch.ops.load_library("libwarp_perspective.so") - - @torch.jit.script - def compute(x, y): - if bool(x[0][0] == 42): - z = 5 - else: - z = 10 - x = torch.ops.my_ops.warp_perspective(x, torch.eye(3)) - return x.matmul(y) + z - - compute.save("example.pt") - -The last line will serialize the script function into a file called -"example.pt". If we then pass this serialized model to our C++ application, we -can run it straight away: - -.. code-block:: shell - - $ ./example_app example.pt - terminate called after throwing an instance of 'torch::jit::script::ErrorReport' - what(): - Schema not found for node. File a bug report. - Node: %16 : Dynamic = my_ops::warp_perspective(%0, %19) - -Or maybe not. Maybe not just yet. Of course! We haven't linked the custom -operator library with our application yet. Let's do this right now, and to do it -properly let's update our file organization slightly, to look like this:: - - example_app/ - CMakeLists.txt - main.cpp - warp_perspective/ - CMakeLists.txt - op.cpp - -This will allow us to add the ``warp_perspective`` library CMake target as a -subdirectory of our application target. The top level ``CMakeLists.txt`` in the -``example_app`` folder should look like this: - -.. code-block:: cmake - - cmake_minimum_required(VERSION 3.1 FATAL_ERROR) - project(example_app) - - find_package(Torch REQUIRED) - - add_subdirectory(warp_perspective) - - add_executable(example_app main.cpp) - target_link_libraries(example_app "${TORCH_LIBRARIES}") - target_link_libraries(example_app -Wl,--no-as-needed warp_perspective) - target_compile_features(example_app PRIVATE cxx_range_for) - -This basic CMake configuration looks much like before, except that we add the -``warp_perspective`` CMake build as a subdirectory. Once its CMake code runs, we -link our ``example_app`` application with the ``warp_perspective`` shared -library. - -.. attention:: - - There is one crucial detail embedded in the above example: The - ``-Wl,--no-as-needed`` prefix to the ``warp_perspective`` link line. This is - required because we will not actually be calling any function from the - ``warp_perspective`` shared library in our application code. We only need the - ``TORCH_LIBRARY`` function to run. Inconveniently, this - confuses the linker and makes it think it can just skip linking against the - library altogether. On Linux, the ``-Wl,--no-as-needed`` flag forces the link - to happen (NB: this flag is specific to Linux!). There are other workarounds - for this. The simplest is to define *some function* in the operator library - that you need to call from the main application. This could be as simple as a - function ``void init();`` declared in some header, which is then defined as - ``void init() { }`` in the operator library. Calling this ``init()`` function - in the main application will give the linker the impression that this is a - library worth linking against. Unfortunately, this is outside of our control, - and we would rather let you know the reason and the simple workaround for this - than handing you some opaque macro to plop in your code. - -Now, since we find the ``Torch`` package at the top level now, the -``CMakeLists.txt`` file in the ``warp_perspective`` subdirectory can be -shortened a bit. It should look like this: - -.. code-block:: cmake - - find_package(OpenCV REQUIRED) - add_library(warp_perspective SHARED op.cpp) - target_compile_features(warp_perspective PRIVATE cxx_range_for) - target_link_libraries(warp_perspective PRIVATE "${TORCH_LIBRARIES}") - target_link_libraries(warp_perspective PRIVATE opencv_core opencv_photo) - -Let's re-build our example app, which will also link with the custom operator -library. In the top level ``example_app`` directory: - -.. code-block:: shell - - $ mkdir build - $ cd build - $ cmake -DCMAKE_PREFIX_PATH="$(python -c 'import torch.utils; print(torch.utils.cmake_prefix_path)')" .. - -- The C compiler identification is GNU 5.4.0 - -- The CXX compiler identification is GNU 5.4.0 - -- Check for working C compiler: /usr/bin/cc - -- Check for working C compiler: /usr/bin/cc -- works - -- Detecting C compiler ABI info - -- Detecting C compiler ABI info - done - -- Detecting C compile features - -- Detecting C compile features - done - -- Check for working CXX compiler: /usr/bin/c++ - -- Check for working CXX compiler: /usr/bin/c++ -- works - -- Detecting CXX compiler ABI info - -- Detecting CXX compiler ABI info - done - -- Detecting CXX compile features - -- Detecting CXX compile features - done - -- Looking for pthread.h - -- Looking for pthread.h - found - -- Looking for pthread_create - -- Looking for pthread_create - not found - -- Looking for pthread_create in pthreads - -- Looking for pthread_create in pthreads - not found - -- Looking for pthread_create in pthread - -- Looking for pthread_create in pthread - found - -- Found Threads: TRUE - -- Found torch: /libtorch/lib/libtorch.so - -- Configuring done - -- Generating done - -- Build files have been written to: /warp_perspective/example_app/build - $ make -j - Scanning dependencies of target warp_perspective - [ 25%] Building CXX object warp_perspective/CMakeFiles/warp_perspective.dir/op.cpp.o - [ 50%] Linking CXX shared library libwarp_perspective.so - [ 50%] Built target warp_perspective - Scanning dependencies of target example_app - [ 75%] Building CXX object CMakeFiles/example_app.dir/main.cpp.o - [100%] Linking CXX executable example_app - [100%] Built target example_app - -If we now run the ``example_app`` binary and hand it our serialized model, we -should arrive at a happy ending: - -.. code-block:: shell - - $ ./example_app example.pt - 11.4125 5.8262 9.5345 8.6111 12.3997 - 7.4683 13.5969 9.0850 11.0698 9.4008 - 7.4597 15.0926 12.5727 8.9319 9.0666 - 9.4834 11.1747 9.0162 10.9521 8.6269 - 10.0000 10.0000 10.0000 10.0000 10.0000 - 10.0000 10.0000 10.0000 10.0000 10.0000 - 10.0000 10.0000 10.0000 10.0000 10.0000 - 10.0000 10.0000 10.0000 10.0000 10.0000 - [ Variable[CPUFloatType]{8,5} ] - -Success! You are now ready to inference away. - -Conclusion ----------- - -This tutorial walked you throw how to implement a custom TorchScript operator in -C++, how to build it into a shared library, how to use it in Python to define -TorchScript models and lastly how to load it into a C++ application for -inference workloads. You are now ready to extend your TorchScript models with -C++ operators that interface with third party C++ libraries, write custom high -performance CUDA kernels, or implement any other use case that requires the -lines between Python, TorchScript and C++ to blend smoothly. - -As always, if you run into any problems or have questions, you can use our -`forum `_ or `GitHub issues -`_ to get in touch. Also, our -`frequently asked questions (FAQ) page -`_ may have helpful information. - -Appendix A: More Ways of Building Custom Operators --------------------------------------------------- - -The section "Building the Custom Operator" explained how to build a custom -operator into a shared library using CMake. This appendix outlines two further -approaches for compilation. Both of them use Python as the "driver" or -"interface" to the compilation process. Also, both re-use the `existing -infrastructure `_ PyTorch -provides for `*C++ extensions* -`_, which are the -vanilla (eager) PyTorch equivalent of TorchScript custom operators that rely on -`pybind11 `_ for "explicit" binding of -functions from C++ into Python. - -The first approach uses C++ extensions' `convenient just-in-time (JIT) -compilation interface -`_ -to compile your code in the background of your PyTorch script the first time you -run it. The second approach relies on the venerable ``setuptools`` package and -involves writing a separate ``setup.py`` file. This allows more advanced -configuration as well as integration with other ``setuptools``-based projects. -We will explore both approaches in detail below. - -Building with JIT compilation -***************************** - -The JIT compilation feature provided by the PyTorch C++ extension toolkit allows -embedding the compilation of your custom operator directly into your Python -code, e.g. at the top of your training script. - -.. note:: - - "JIT compilation" here has nothing to do with the JIT compilation taking place - in the TorchScript compiler to optimize your program. It simply means that - your custom operator C++ code will be compiled in a folder under your system's - `/tmp` directory the first time you import it, as if you had compiled it - yourself beforehand. - -This JIT compilation feature comes in two flavors. In the first, you still keep -your operator implementation in a separate file (``op.cpp``), and then use -``torch.utils.cpp_extension.load()`` to compile your extension. Usually, this -function will return the Python module exposing your C++ extension. However, -since we are not compiling our custom operator into its own Python module, we -only want to compile a plain shared library . Fortunately, -``torch.utils.cpp_extension.load()`` has an argument ``is_python_module`` which -we can set to ``False`` to indicate that we are only interested in building a -shared library and not a Python module. ``torch.utils.cpp_extension.load()`` -will then compile and also load the shared library into the current process, -just like ``torch.ops.load_library`` did before: - -.. code-block:: python - - import torch.utils.cpp_extension - - torch.utils.cpp_extension.load( - name="warp_perspective", - sources=["op.cpp"], - extra_ldflags=["-lopencv_core", "-lopencv_imgproc"], - is_python_module=False, - verbose=True - ) - - print(torch.ops.my_ops.warp_perspective) - -This should approximately print: - -.. code-block:: python - - - -The second flavor of JIT compilation allows you to pass the source code for your -custom TorchScript operator as a string. For this, use -``torch.utils.cpp_extension.load_inline``: - -.. code-block:: python - - import torch - import torch.utils.cpp_extension - - op_source = """ - #include - #include - - torch::Tensor warp_perspective(torch::Tensor image, torch::Tensor warp) { - cv::Mat image_mat(/*rows=*/image.size(0), - /*cols=*/image.size(1), - /*type=*/CV_32FC1, - /*data=*/image.data()); - cv::Mat warp_mat(/*rows=*/warp.size(0), - /*cols=*/warp.size(1), - /*type=*/CV_32FC1, - /*data=*/warp.data()); - - cv::Mat output_mat; - cv::warpPerspective(image_mat, output_mat, warp_mat, /*dsize=*/{64, 64}); - - torch::Tensor output = - torch::from_blob(output_mat.ptr(), /*sizes=*/{64, 64}); - return output.clone(); - } - - TORCH_LIBRARY(my_ops, m) { - m.def("warp_perspective", &warp_perspective); - } - """ - - torch.utils.cpp_extension.load_inline( - name="warp_perspective", - cpp_sources=op_source, - extra_ldflags=["-lopencv_core", "-lopencv_imgproc"], - is_python_module=False, - verbose=True, - ) - - print(torch.ops.my_ops.warp_perspective) - -Naturally, it is best practice to only use -``torch.utils.cpp_extension.load_inline`` if your source code is reasonably -short. - -Note that if you're using this in a Jupyter Notebook, you should not execute -the cell with the registration multiple times because each execution registers -a new library and re-registers the custom operator. If you need to re-execute it, -please restart the Python kernel of your notebook beforehand. - -Building with Setuptools -************************ - -The second approach to building our custom operator exclusively from Python is -to use ``setuptools``. This has the advantage that ``setuptools`` has a quite -powerful and extensive interface for building Python modules written in C++. -However, since ``setuptools`` is really intended for building Python modules and -not plain shared libraries (which do not have the necessary entry points Python -expects from a module), this route can be slightly quirky. That said, all you -need is a ``setup.py`` file in place of the ``CMakeLists.txt`` which looks like -this: - -.. code-block:: python - - from setuptools import setup - from torch.utils.cpp_extension import BuildExtension, CppExtension - - setup( - name="warp_perspective", - ext_modules=[ - CppExtension( - "warp_perspective", - ["example_app/warp_perspective/op.cpp"], - libraries=["opencv_core", "opencv_imgproc"], - ) - ], - cmdclass={"build_ext": BuildExtension.with_options(no_python_abi_suffix=True)}, - ) - - -Notice that we enabled the ``no_python_abi_suffix`` option in the -``BuildExtension`` at the bottom. This instructs ``setuptools`` to omit any -Python-3 specific ABI suffixes in the name of the produced shared library. -Otherwise, on Python 3.7 for example, the library may be called -``warp_perspective.cpython-37m-x86_64-linux-gnu.so`` where -``cpython-37m-x86_64-linux-gnu`` is the ABI tag, but we really just want it to -be called ``warp_perspective.so`` - -If we now run ``python setup.py build develop`` in a terminal from within the -folder in which ``setup.py`` is situated, we should see something like: - -.. code-block:: shell - - $ python setup.py build develop - running build - running build_ext - building 'warp_perspective' extension - creating build - creating build/temp.linux-x86_64-3.7 - gcc -pthread -B /root/local/miniconda/compiler_compat -Wl,--sysroot=/ -Wsign-compare -DNDEBUG -g -fwrapv -O3 -Wall -Wstrict-prototypes -fPIC -I/root/local/miniconda/lib/python3.7/site-packages/torch/lib/include -I/root/local/miniconda/lib/python3.7/site-packages/torch/lib/include/torch/csrc/api/include -I/root/local/miniconda/lib/python3.7/site-packages/torch/lib/include/TH -I/root/local/miniconda/lib/python3.7/site-packages/torch/lib/include/THC -I/root/local/miniconda/include/python3.7m -c op.cpp -o build/temp.linux-x86_64-3.7/op.o -DTORCH_API_INCLUDE_EXTENSION_H -DTORCH_EXTENSION_NAME=warp_perspective -D_GLIBCXX_USE_CXX11_ABI=0 -std=c++11 - cc1plus: warning: command line option ‘-Wstrict-prototypes’ is valid for C/ObjC but not for C++ - creating build/lib.linux-x86_64-3.7 - g++ -pthread -shared -B /root/local/miniconda/compiler_compat -L/root/local/miniconda/lib -Wl,-rpath=/root/local/miniconda/lib -Wl,--no-as-needed -Wl,--sysroot=/ build/temp.linux-x86_64-3.7/op.o -lopencv_core -lopencv_imgproc -o build/lib.linux-x86_64-3.7/warp_perspective.so - running develop - running egg_info - creating warp_perspective.egg-info - writing warp_perspective.egg-info/PKG-INFO - writing dependency_links to warp_perspective.egg-info/dependency_links.txt - writing top-level names to warp_perspective.egg-info/top_level.txt - writing manifest file 'warp_perspective.egg-info/SOURCES.txt' - reading manifest file 'warp_perspective.egg-info/SOURCES.txt' - writing manifest file 'warp_perspective.egg-info/SOURCES.txt' - running build_ext - copying build/lib.linux-x86_64-3.7/warp_perspective.so -> - Creating /root/local/miniconda/lib/python3.7/site-packages/warp-perspective.egg-link (link to .) - Adding warp-perspective 0.0.0 to easy-install.pth file - - Installed /warp_perspective - Processing dependencies for warp-perspective==0.0.0 - Finished processing dependencies for warp-perspective==0.0.0 - -This will produce a shared library called ``warp_perspective.so``, which we can -pass to ``torch.ops.load_library`` as we did earlier to make our operator -visible to TorchScript: - -.. code-block:: python - - >>> import torch - >>> torch.ops.load_library("warp_perspective.so") - >>> print(torch.ops.my_ops.warp_perspective) - + TorchScript is deprecated, please use + `torch.export`_ instead. \ No newline at end of file diff --git a/advanced_source/torch_script_custom_ops/CMakeLists.txt b/advanced_source/torch_script_custom_ops/CMakeLists.txt deleted file mode 100644 index e116153b941..00000000000 --- a/advanced_source/torch_script_custom_ops/CMakeLists.txt +++ /dev/null @@ -1,14 +0,0 @@ -cmake_minimum_required(VERSION 3.1 FATAL_ERROR) -project(warp_perspective) - -find_package(Torch REQUIRED) -find_package(OpenCV REQUIRED) - -# Define our library target -add_library(warp_perspective SHARED op.cpp) -# Enable C++14 -target_compile_features(warp_perspective PRIVATE cxx_std_14) -# Link against LibTorch -target_link_libraries(warp_perspective "${TORCH_LIBRARIES}") -# Link against OpenCV -target_link_libraries(warp_perspective opencv_core opencv_imgproc) diff --git a/advanced_source/torch_script_custom_ops/op.cpp b/advanced_source/torch_script_custom_ops/op.cpp deleted file mode 100644 index ff2eb049c4c..00000000000 --- a/advanced_source/torch_script_custom_ops/op.cpp +++ /dev/null @@ -1,36 +0,0 @@ -#include -#include - -// BEGIN warp_perspective -torch::Tensor warp_perspective(torch::Tensor image, torch::Tensor warp) { - // BEGIN image_mat - cv::Mat image_mat(/*rows=*/image.size(0), - /*cols=*/image.size(1), - /*type=*/CV_32FC1, - /*data=*/image.data_ptr()); - // END image_mat - - // BEGIN warp_mat - cv::Mat warp_mat(/*rows=*/warp.size(0), - /*cols=*/warp.size(1), - /*type=*/CV_32FC1, - /*data=*/warp.data_ptr()); - // END warp_mat - - // BEGIN output_mat - cv::Mat output_mat; - cv::warpPerspective(image_mat, output_mat, warp_mat, /*dsize=*/{8, 8}); - // END output_mat - - // BEGIN output_tensor - torch::Tensor output = torch::from_blob(output_mat.ptr(), /*sizes=*/{8, 8}); - return output.clone(); - // END output_tensor -} -// END warp_perspective - -// BEGIN registry -TORCH_LIBRARY(my_ops, m) { - m.def("warp_perspective", warp_perspective); -} -// END registry diff --git a/advanced_source/torch_script_custom_ops/smoke_test.py b/advanced_source/torch_script_custom_ops/smoke_test.py deleted file mode 100644 index fa629ddcafb..00000000000 --- a/advanced_source/torch_script_custom_ops/smoke_test.py +++ /dev/null @@ -1,3 +0,0 @@ -import torch -torch.ops.load_library("build/libwarp_perspective.so") -print(torch.ops.my_ops.warp_perspective) diff --git a/advanced_source/torch_script_custom_ops/test.py b/advanced_source/torch_script_custom_ops/test.py deleted file mode 100644 index 26f96ef4599..00000000000 --- a/advanced_source/torch_script_custom_ops/test.py +++ /dev/null @@ -1,34 +0,0 @@ -import torch - - -print("BEGIN preamble") -torch.ops.load_library("build/libwarp_perspective.so") -print(torch.ops.my_ops.warp_perspective(torch.randn(32, 32), torch.rand(3, 3))) -print("END preamble") - - -# BEGIN compute -def compute(x, y, z): - return x.matmul(y) + torch.relu(z) -# END compute - - -print("BEGIN trace") -inputs = [torch.randn(4, 8), torch.randn(8, 5), torch.randn(4, 5)] -trace = torch.jit.trace(compute, inputs) -print(trace.graph) -print("END trace") - - -# BEGIN compute2 -def compute(x, y, z): - x = torch.ops.my_ops.warp_perspective(x, torch.eye(3)) - return x.matmul(y) + torch.relu(z) -# END compute2 - - -print("BEGIN trace2") -inputs = [torch.randn(4, 8), torch.randn(8, 5), torch.randn(8, 5)] -trace = torch.jit.trace(compute, inputs) -print(trace.graph) -print("END trace2") diff --git a/beginner_source/Intro_to_TorchScript_tutorial.py b/beginner_source/Intro_to_TorchScript_tutorial.py deleted file mode 100644 index 54799229342..00000000000 --- a/beginner_source/Intro_to_TorchScript_tutorial.py +++ /dev/null @@ -1,400 +0,0 @@ -""" -Introduction to TorchScript -=========================== - -**Authors:** James Reed (jamesreed@fb.com), Michael Suo (suo@fb.com), rev2 - -.. warning:: TorchScript is no longer in active development. - -This tutorial is an introduction to TorchScript, an intermediate -representation of a PyTorch model (subclass of ``nn.Module``) that -can then be run in a high-performance environment such as C++. - -In this tutorial we will cover: - -1. The basics of model authoring in PyTorch, including: - -- Modules -- Defining ``forward`` functions -- Composing modules into a hierarchy of modules - -2. Specific methods for converting PyTorch modules to TorchScript, our - high-performance deployment runtime - -- Tracing an existing module -- Using scripting to directly compile a module -- How to compose both approaches -- Saving and loading TorchScript modules - -We hope that after you complete this tutorial, you will proceed to go through -`the follow-on tutorial `_ -which will walk you through an example of actually calling a TorchScript -model from C++. - -""" - -import torch # This is all you need to use both PyTorch and TorchScript! -print(torch.__version__) -torch.manual_seed(191009) # set the seed for reproducibility - - -###################################################################### -# Basics of PyTorch Model Authoring -# --------------------------------- -# -# Let’s start out by defining a simple ``Module``. A ``Module`` is the -# basic unit of composition in PyTorch. It contains: -# -# 1. A constructor, which prepares the module for invocation -# 2. A set of ``Parameters`` and sub-\ ``Modules``. These are initialized -# by the constructor and can be used by the module during invocation. -# 3. A ``forward`` function. This is the code that is run when the module -# is invoked. -# -# Let’s examine a small example: -# - -class MyCell(torch.nn.Module): - def __init__(self): - super(MyCell, self).__init__() - - def forward(self, x, h): - new_h = torch.tanh(x + h) - return new_h, new_h - -my_cell = MyCell() -x = torch.rand(3, 4) -h = torch.rand(3, 4) -print(my_cell(x, h)) - - -###################################################################### -# So we’ve: -# -# 1. Created a class that subclasses ``torch.nn.Module``. -# 2. Defined a constructor. The constructor doesn’t do much, just calls -# the constructor for ``super``. -# 3. Defined a ``forward`` function, which takes two inputs and returns -# two outputs. The actual contents of the ``forward`` function are not -# really important, but it’s sort of a fake `RNN -# cell `__–that -# is–it’s a function that is applied on a loop. -# -# We instantiated the module, and made ``x`` and ``h``, which are just 3x4 -# matrices of random values. Then we invoked the cell with -# ``my_cell(x, h)``. This in turn calls our ``forward`` function. -# -# Let’s do something a little more interesting: -# - -class MyCell(torch.nn.Module): - def __init__(self): - super(MyCell, self).__init__() - self.linear = torch.nn.Linear(4, 4) - - def forward(self, x, h): - new_h = torch.tanh(self.linear(x) + h) - return new_h, new_h - -my_cell = MyCell() -print(my_cell) -print(my_cell(x, h)) - - -###################################################################### -# We’ve redefined our module ``MyCell``, but this time we’ve added a -# ``self.linear`` attribute, and we invoke ``self.linear`` in the forward -# function. -# -# What exactly is happening here? ``torch.nn.Linear`` is a ``Module`` from -# the PyTorch standard library. Just like ``MyCell``, it can be invoked -# using the call syntax. We are building a hierarchy of ``Module``\ s. -# -# ``print`` on a ``Module`` will give a visual representation of the -# ``Module``\ ’s subclass hierarchy. In our example, we can see our -# ``Linear`` subclass and its parameters. -# -# By composing ``Module``\ s in this way, we can succinctly and readably -# author models with reusable components. -# -# You may have noticed ``grad_fn`` on the outputs. This is a detail of -# PyTorch’s method of automatic differentiation, called -# `autograd `__. -# In short, this system allows us to compute derivatives through -# potentially complex programs. The design allows for a massive amount of -# flexibility in model authoring. -# -# Now let’s examine said flexibility: -# - -class MyDecisionGate(torch.nn.Module): - def forward(self, x): - if x.sum() > 0: - return x - else: - return -x - -class MyCell(torch.nn.Module): - def __init__(self): - super(MyCell, self).__init__() - self.dg = MyDecisionGate() - self.linear = torch.nn.Linear(4, 4) - - def forward(self, x, h): - new_h = torch.tanh(self.dg(self.linear(x)) + h) - return new_h, new_h - -my_cell = MyCell() -print(my_cell) -print(my_cell(x, h)) - - -###################################################################### -# We’ve once again redefined our ``MyCell`` class, but here we’ve defined -# ``MyDecisionGate``. This module utilizes **control flow**. Control flow -# consists of things like loops and ``if``-statements. -# -# Many frameworks take the approach of computing symbolic derivatives -# given a full program representation. However, in PyTorch, we use a -# gradient tape. We record operations as they occur, and replay them -# backwards in computing derivatives. In this way, the framework does not -# have to explicitly define derivatives for all constructs in the -# language. -# -# .. figure:: https://github.com/pytorch/pytorch/raw/main/docs/source/_static/img/dynamic_graph.gif -# :alt: How autograd works -# -# How autograd works -# - - -###################################################################### -# Basics of TorchScript -# --------------------- -# -# Now let’s take our running example and see how we can apply TorchScript. -# -# In short, TorchScript provides tools to capture the definition of your -# model, even in light of the flexible and dynamic nature of PyTorch. -# Let’s begin by examining what we call **tracing**. -# -# Tracing ``Modules`` -# ~~~~~~~~~~~~~~~~~~~ -# - -class MyCell(torch.nn.Module): - def __init__(self): - super(MyCell, self).__init__() - self.linear = torch.nn.Linear(4, 4) - - def forward(self, x, h): - new_h = torch.tanh(self.linear(x) + h) - return new_h, new_h - -my_cell = MyCell() -x, h = torch.rand(3, 4), torch.rand(3, 4) -traced_cell = torch.jit.trace(my_cell, (x, h)) -print(traced_cell) -traced_cell(x, h) - - -###################################################################### -# We’ve rewinded a bit and taken the second version of our ``MyCell`` -# class. As before, we’ve instantiated it, but this time, we’ve called -# ``torch.jit.trace``, passed in the ``Module``, and passed in *example -# inputs* the network might see. -# -# What exactly has this done? It has invoked the ``Module``, recorded the -# operations that occurred when the ``Module`` was run, and created an -# instance of ``torch.jit.ScriptModule`` (of which ``TracedModule`` is an -# instance) -# -# TorchScript records its definitions in an Intermediate Representation -# (or IR), commonly referred to in Deep learning as a *graph*. We can -# examine the graph with the ``.graph`` property: -# - -print(traced_cell.graph) - - -###################################################################### -# However, this is a very low-level representation and most of the -# information contained in the graph is not useful for end users. Instead, -# we can use the ``.code`` property to give a Python-syntax interpretation -# of the code: -# - -print(traced_cell.code) - - -###################################################################### -# So **why** did we do all this? There are several reasons: -# -# 1. TorchScript code can be invoked in its own interpreter, which is -# basically a restricted Python interpreter. This interpreter does not -# acquire the Global Interpreter Lock, and so many requests can be -# processed on the same instance simultaneously. -# 2. This format allows us to save the whole model to disk and load it -# into another environment, such as in a server written in a language -# other than Python -# 3. TorchScript gives us a representation in which we can do compiler -# optimizations on the code to provide more efficient execution -# 4. TorchScript allows us to interface with many backend/device runtimes -# that require a broader view of the program than individual operators. -# -# We can see that invoking ``traced_cell`` produces the same results as -# the Python module: -# - -print(my_cell(x, h)) -print(traced_cell(x, h)) - - -###################################################################### -# Using Scripting to Convert Modules -# ---------------------------------- -# -# There’s a reason we used version two of our module, and not the one with -# the control-flow-laden submodule. Let’s examine that now: -# - -class MyDecisionGate(torch.nn.Module): - def forward(self, x): - if x.sum() > 0: - return x - else: - return -x - -class MyCell(torch.nn.Module): - def __init__(self, dg): - super(MyCell, self).__init__() - self.dg = dg - self.linear = torch.nn.Linear(4, 4) - - def forward(self, x, h): - new_h = torch.tanh(self.dg(self.linear(x)) + h) - return new_h, new_h - -my_cell = MyCell(MyDecisionGate()) -traced_cell = torch.jit.trace(my_cell, (x, h)) - -print(traced_cell.dg.code) -print(traced_cell.code) - - -###################################################################### -# Looking at the ``.code`` output, we can see that the ``if-else`` branch -# is nowhere to be found! Why? Tracing does exactly what we said it would: -# run the code, record the operations *that happen* and construct a -# ``ScriptModule`` that does exactly that. Unfortunately, things like control -# flow are erased. -# -# How can we faithfully represent this module in TorchScript? We provide a -# **script compiler**, which does direct analysis of your Python source -# code to transform it into TorchScript. Let’s convert ``MyDecisionGate`` -# using the script compiler: -# - -scripted_gate = torch.jit.script(MyDecisionGate()) - -my_cell = MyCell(scripted_gate) -scripted_cell = torch.jit.script(my_cell) - -print(scripted_gate.code) -print(scripted_cell.code) - - -###################################################################### -# Hooray! We’ve now faithfully captured the behavior of our program in -# TorchScript. Let’s now try running the program: -# - -# New inputs -x, h = torch.rand(3, 4), torch.rand(3, 4) -print(scripted_cell(x, h)) - - -###################################################################### -# Mixing Scripting and Tracing -# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -# -# Some situations call for using tracing rather than scripting (e.g. a -# module has many architectural decisions that are made based on constant -# Python values that we would like to not appear in TorchScript). In this -# case, scripting can be composed with tracing: ``torch.jit.script`` will -# inline the code for a traced module, and tracing will inline the code -# for a scripted module. -# -# An example of the first case: -# - -class MyRNNLoop(torch.nn.Module): - def __init__(self): - super(MyRNNLoop, self).__init__() - self.cell = torch.jit.trace(MyCell(scripted_gate), (x, h)) - - def forward(self, xs): - h, y = torch.zeros(3, 4), torch.zeros(3, 4) - for i in range(xs.size(0)): - y, h = self.cell(xs[i], h) - return y, h - -rnn_loop = torch.jit.script(MyRNNLoop()) -print(rnn_loop.code) - - - -###################################################################### -# And an example of the second case: -# - -class WrapRNN(torch.nn.Module): - def __init__(self): - super(WrapRNN, self).__init__() - self.loop = torch.jit.script(MyRNNLoop()) - - def forward(self, xs): - y, h = self.loop(xs) - return torch.relu(y) - -traced = torch.jit.trace(WrapRNN(), (torch.rand(10, 3, 4))) -print(traced.code) - - -###################################################################### -# This way, scripting and tracing can be used when the situation calls for -# each of them and used together. -# -# Saving and Loading models -# ------------------------- -# -# We provide APIs to save and load TorchScript modules to/from disk in an -# archive format. This format includes code, parameters, attributes, and -# debug information, meaning that the archive is a freestanding -# representation of the model that can be loaded in an entirely separate -# process. Let’s save and load our wrapped RNN module: -# - -traced.save('wrapped_rnn.pt') - -loaded = torch.jit.load('wrapped_rnn.pt') - -print(loaded) -print(loaded.code) - - -###################################################################### -# As you can see, serialization preserves the module hierarchy and the -# code we’ve been examining throughout. The model can also be loaded, for -# example, `into -# C++ `__ for -# python-free execution. -# -# Further Reading -# ~~~~~~~~~~~~~~~ -# -# We’ve completed our tutorial! For a more involved demonstration, check -# out the NeurIPS demo for converting machine translation models using -# TorchScript: -# https://colab.research.google.com/drive/1HiICg6jRkBnr5hvK2-VnMi88Vi9pUzEJ -# diff --git a/prototype_source/README.txt b/prototype_source/README.txt index 67aab29bb47..cc5074ff67a 100644 --- a/prototype_source/README.txt +++ b/prototype_source/README.txt @@ -4,18 +4,14 @@ Prototype Tutorials Profiling PyTorch RPC-Based Workloads https://github.com/pytorch/tutorials/blob/main/prototype_source/distributed_rpc_profiling.rst -2. torchscript_freezing.py - Model Freezing in TorchScript - https://github.com/pytorch/tutorials/blob/main/prototype_source/torchscript_freezing.py - -3. vulkan_workflow.rst +2. vulkan_workflow.rst Vulkan Backend User Workflow https://pytorch.org/tutorials/prototype/vulkan_workflow.html -4. flight_recorder_tutorial.rst +3. flight_recorder_tutorial.rst Flight Recorder User Guide https://pytorch.org/tutorials/prototype/flight_recorder_tutorial.html -5. python_extension_autoload.rst +4. python_extension_autoload.rst Autoloading Out-of-Tree Extension https://pytorch.org/tutorials/prototype/python_extension_autoload.html diff --git a/prototype_source/torchscript_freezing.py b/prototype_source/torchscript_freezing.py deleted file mode 100644 index b5b467a247e..00000000000 --- a/prototype_source/torchscript_freezing.py +++ /dev/null @@ -1,136 +0,0 @@ -""" -Model Freezing in TorchScript -============================= - -.. warning:: TorchScript is no longer in active development. - -In this tutorial, we introduce the syntax for *model freezing* in TorchScript. -Freezing is the process of inlining Pytorch module parameters and attributes -values into the TorchScript internal representation. Parameter and attribute -values are treated as final values and they cannot be modified in the resulting -Frozen module. - -Basic Syntax ------------- -Model freezing can be invoked using API below: - - ``torch.jit.freeze(mod : ScriptModule, names : str[]) -> ScriptModule`` - -Note the input module can either be the result of scripting or tracing. -See https://pytorch.org/tutorials/beginner/Intro_to_TorchScript_tutorial.html - -Next, we demonstrate how freezing works using an example: -""" - -import torch, time - -class Net(torch.nn.Module): - def __init__(self): - super(Net, self).__init__() - self.conv1 = torch.nn.Conv2d(1, 32, 3, 1) - self.conv2 = torch.nn.Conv2d(32, 64, 3, 1) - self.dropout1 = torch.nn.Dropout2d(0.25) - self.dropout2 = torch.nn.Dropout2d(0.5) - self.fc1 = torch.nn.Linear(9216, 128) - self.fc2 = torch.nn.Linear(128, 10) - - def forward(self, x): - x = self.conv1(x) - x = torch.nn.functional.relu(x) - x = self.conv2(x) - x = torch.nn.functional.max_pool2d(x, 2) - x = self.dropout1(x) - x = torch.flatten(x, 1) - x = self.fc1(x) - x = torch.nn.functional.relu(x) - x = self.dropout2(x) - x = self.fc2(x) - output = torch.nn.functional.log_softmax(x, dim=1) - return output - - @torch.jit.export - def version(self): - return 1.0 - -net = torch.jit.script(Net()) -fnet = torch.jit.freeze(net) - -print(net.conv1.weight.size()) -print(net.conv1.bias) - -try: - print(fnet.conv1.bias) - # without exception handling, prints: - # RuntimeError: __torch__.z.___torch_mangle_3.Net does not have a field - # with name 'conv1' -except RuntimeError: - print("field 'conv1' is inlined. It does not exist in 'fnet'") - -try: - fnet.version() - # without exception handling, prints: - # RuntimeError: __torch__.z.___torch_mangle_3.Net does not have a field - # with name 'version' -except RuntimeError: - print("method 'version' is not deleted in fnet. Only 'forward' is preserved") - -fnet2 = torch.jit.freeze(net, ["version"]) - -print(fnet2.version()) - -B=1 -warmup = 1 -iter = 1000 -input = torch.rand(B, 1,28, 28) - -start = time.time() -for i in range(warmup): - net(input) -end = time.time() -print("Scripted - Warm up time: {0:7.4f}".format(end-start), flush=True) - -start = time.time() -for i in range(warmup): - fnet(input) -end = time.time() -print("Frozen - Warm up time: {0:7.4f}".format(end-start), flush=True) - -start = time.time() -for i in range(iter): - input = torch.rand(B, 1,28, 28) - net(input) -end = time.time() -print("Scripted - Inference: {0:5.2f}".format(end-start), flush=True) - -start = time.time() -for i in range(iter): - input = torch.rand(B, 1,28, 28) - fnet2(input) -end = time.time() -print("Frozen - Inference time: {0:5.2f}".format(end-start), flush =True) - -############################################################### -# On my machine, I measured the time: -# -# * Scripted - Warm up time: 0.0107 -# * Frozen - Warm up time: 0.0048 -# * Scripted - Inference: 1.35 -# * Frozen - Inference time: 1.17 - -############################################################### -# In our example, warm up time measures the first two runs. The frozen model -# is 50% faster than the scripted model. On some more complex models, we -# observed even higher speed up of warm up time. freezing achieves this speed up -# because it is doing some the work TorchScript has to do when the first couple -# runs are initiated. -# -# Inference time measures inference execution time after the model is warmed up. -# Although we observed significant variation in execution time, the -# frozen model is often about 15% faster than the scripted model. When input is larger, -# we observe a smaller speed up because the execution is dominated by tensor operations. - -############################################################### -# Conclusion -# ----------- -# In this tutorial, we learned about model freezing. Freezing is a useful technique to -# optimize models for inference and it also can significantly reduce TorchScript warmup time. diff --git a/recipes_source/distributed_optim_torchscript.rst b/recipes_source/distributed_optim_torchscript.rst index 2e68f035170..3d2042bd6b7 100644 --- a/recipes_source/distributed_optim_torchscript.rst +++ b/recipes_source/distributed_optim_torchscript.rst @@ -1,213 +1,3 @@ -Distributed Optimizer with TorchScript support -============================================================== - -.. warning:: TorchScript is no longer in active development. - -In this recipe, you will learn: - -- The high-level idea of distributed optimizer with TorchScript support and what this feature brings -- How to write customized distributed optimizer that enables TorchScript support - - -Requirements ------------- - -- PyTorch 1.8+ -- `Getting Started With Distributed RPC Framework `_ - - -What is Distributed Optimizer? ------------------------------------- - -`DistributedOptimizer `_ takes a list of remote -parameters (RRef) and runs the optimizer locally on the workers where the parameters live, which is commonly used together -with Distributed RPC/Autograd to do model parallel training. It could use any of the local optimizer algorithms (either -pre-defined algorithms provided in ``torch.optim`` or custom defined ones) to apply the gradients on each worker. - - -What is Distributed Optimizer with TorchScript support? -------------------------------------------------------- - -Distributed Optimizer are widely used in distributed model parallel training, and in some -common use cases, training need to be done in multithreaded manner instead of multiprocess -due to performance concern and resource utilizations (or at least partially multithreaded, -i.e. Parameter Server hosting part of the model and parameters, with new thread updating the -parameters per request). PyTorch itself does not support multithreaded training natively as -it suffers from the Python's Global Interpreter Lock (GIL), but it could leverage -`TorchScript `_ to get rid of GIL and run the -model in a multithreaded way. - -For critical model training workloads, improving the training performance is an -important topic. Researchers often would like to implement different optimization strategies -with the graph representation (i.e. via operator fusion) or implement custom operator kernels -in order to speed up training. - -Distributed Optimizer with TorchScript support could help getting rid of GIL, thus improve -PyTorch's training performance in the multithreaded environment, it also unlocks the potential -to further enhance the performance by using advanced compiler technologies that TorchScript -offers (i.e. CPU/GPU fusion). - - -How to write a customized distributed optimizer with TorchScript support? -------------------------------------------------------------------------- - -The code below shows how to write a customized distributed optimizer given an existing local -optimizer implementation, which unlocks the TorchScript benefits including GIL removal and -performance improvement opportunities. - -Suppose that you already have a local optimizer that is currently used during training, -In this case we will use `quasi-hyperbolic momentum (QHM) `_ -as an example to show how to enable the TorchScript support, note that it also applies -to any custom optimizers that inherits from ``torch.optim.Optimizer``. - -First, we need to separate the computation and state management from the optimizer implementation, -this is so that we could extract the computation part and make it a free function, which is -TorchScript friendly. It has two benefits: 1. The computation logic becomes easier to inspect, -it allows us to quickly turn the parameter update/computation part into TorchScript, and utilize -TorchScript IR to do further optimizations (operator fusion, etc.) 2. Distributed Optimizer -underlying is using a different mechanisms to get gradients and update parameters (we store -gradients separately instead of directly populating the ``param.grad`` field during backward). -Separating the computation allows distributed optimizer to enable the possibility of optimizer -update in multithreaded mode, as it eliminates the possible race condition to ``param.grad``. - - -:: - - import torch - from torch import Tensor - from typing import List - - - def qhm_update(params: List[Tensor], - dp_list: List[Tensor], - momentum_buffer_list: List[Tensor], - lr: float, - nu: float, - weight_decay: float, - weight_decay_type: str, - momentum: float): - - for p, d_p, momentum_buffer in zip(params, dp_list, momentum_buffer_list): - if weight_decay != 0: - if weight_decay_type == "grad": - d_p.add_(weight_decay, p) - elif weight_decay_type == "direct": - p.mul_(1.0 - lr * weight_decay) - else: - raise ValueError("Invalid weight decay type provided") - - momentum_buffer.mul_(momentum).add_(1.0 - momentum, d_p) - - p.data.add_(-lr * nu, momentum_buffer) - p.data.add_(-lr * (1.0 - nu), d_p) - - - -Next we will define a distributed functional optimizer with TorchScript compatability to manage -the optimizer states and calls into the TorchScript compatible update function we defined above. -Note that a few conventions are different from normal custom optimizers: 1. We don't inherit -``torch.optim.Optimizer`` as TorchScript does not support polymorphism 2. ``step`` takes gradients -list instead of the loss closure. - -:: - - import torch - from torch import Tensor - from typing import List, Optional, Dict - - # define this as a TorchScript class - @torch.jit.script - class FunctionalQHM(object): - def __init__(self, - params: List[Tensor], - lr: float, - momentum: float, - nu: float, - weight_decay: float = 0.0, - weight_decay_type: str = "grad"): - if lr < 0.0: - raise ValueError("Invalid learning rate: {}".format(lr)) - if momentum < 0.0: - raise ValueError("Invalid momentum value: {}".format(momentum)) - if weight_decay < 0.0: - raise ValueError("Invalid weight_decay value: {}".format(weight_decay)) - if weight_decay_type not in ("grad", "direct"): - raise ValueError("Invalid weight_decay_type value: {}".format(weight_decay_type)) - - self.defaults = { - "lr": lr, - "momentum": momentum, - "nu": nu, - "weight_decay": weight_decay, - } - self.weight_decay_type = weight_decay_type - - # NOTE: we only have one param_group here and don't allow user to add additional - # param group as it's not a common use case. - self.param_group = {"params": params} - - self.state = torch.jit.annotate(Dict[torch.Tensor, Dict[str, torch.Tensor]], {}) - - def step(self, gradients: List[Optional[Tensor]]): - params = self.param_group['params'] - params_with_grad = [] - grads = [] - momentum_buffer_list: List[Tensor] = [] - - if len(params) != len(gradients): - raise ValueError( - "the gradients passed in does not equal to the size of the parameters!" - + f"Params length: {len(params)}. " - + f"Gradients length: {len(gradients)}" - ) - - for param, gradient in zip(self.param_group['params'], gradients): - if gradient is not None: - params_with_grad.append(param) - grads.append(gradient) - state = self.state[param] - state['momentum_buffer'] = torch.zeros_like(param, memory_format=torch.preserve_format) - momentum_buffer_list.append(state['momentum_buffer']) - - # calls into the update function we just defined - with torch.no_grad(): - qhm_update(params_with_grad, - grads, - momentum_buffer_list, - self.defaults['lr'], - self.defaults['nu'], - self.defaults['weight_decay'], - self.weight_decay_type, - self.defaults['momentum']) - - - -Finally, we register our newly defined distributed functional optimizer into the ``functional_optim_map`` -This is so that the ``DistributedOptimizer`` will try to pick up our custom implementation instead of the -pre-defined default ones. - -:: - - from torch.distributed.optim import DistributedOptimizer - - DistributedOptimizer.functional_optim_map[QHM] = FunctionalQHM - -Now you can use the ``QHM`` optimizer as normal in distributed training by passing it to -`DistributedOptimizer `_ - - -:: - - ... - remote_params_list = [...] - dist_optim = DistributedOptimizer( - QHM, remote_params_list, *args, **kwargs - ) - -DistributedOptimizer will automatically transform the QHM optimizer into the ``FunctionalQHM`` under the hood, -and enable the TorchScript support. This will unlock the performance that boosted by multithreaded training -and also give more potentials for further improvements (i.e. TorchScript fusion, etc.) - -Note that majority of PyTorch built-in optimizers are already using this methodology to speed up distributed -training. If you see warning about some optimizers haven't been converted yet, you can write your own conversion -by following this recipe. +.. warning:: + TorchScript is deprecated, please use + `torch.export`_ instead. \ No newline at end of file diff --git a/recipes_source/torchscript_inference.rst b/recipes_source/torchscript_inference.rst index 8c78413edd1..3d2042bd6b7 100644 --- a/recipes_source/torchscript_inference.rst +++ b/recipes_source/torchscript_inference.rst @@ -1,199 +1,3 @@ -TorchScript for Deployment -========================== - -.. warning:: TorchScript is no longer in active development. - -In this recipe, you will learn: - -- What TorchScript is -- How to export your trained model in TorchScript format -- How to load your TorchScript model in C++ and do inference - -Requirements ------------- - -- PyTorch 1.5 -- TorchVision 0.6.0 -- libtorch 1.5 -- C++ compiler - -The instructions for installing the three PyTorch components are -available at `pytorch.org`_. The C++ compiler will depend on your -platform. - -What is TorchScript? --------------------- - -**TorchScript** is an intermediate representation of a PyTorch model -(subclass of ``nn.Module``) that can then be run in a high-performance -environment like C++. It’s a high-performance subset of Python that is -meant to be consumed by the **PyTorch JIT Compiler,** which performs -run-time optimization on your model’s computation. TorchScript is the -recommended model format for doing scaled inference with PyTorch models. -For more information, see the PyTorch `Introduction to TorchScript -tutorial`_, the `Loading A TorchScript Model in C++ tutorial`_, and the -`full TorchScript documentation`_, all of which are available on -`pytorch.org`_. - -How to Export Your Model ------------------------- - -As an example, let’s take a pretrained vision model. All of the -pretrained models in TorchVision are compatible with TorchScript. - -Run the following Python 3 code, either in a script or from the REPL: - -.. code:: python3 - - import torch - import torch.nn.functional as F - import torchvision.models as models - - r18 = models.resnet18(pretrained=True) # We now have an instance of the pretrained model - r18_scripted = torch.jit.script(r18) # *** This is the TorchScript export - dummy_input = torch.rand(1, 3, 224, 224) # We should run a quick test - -Let’s do a sanity check on the equivalence of the two models: - -:: - - unscripted_output = r18(dummy_input) # Get the unscripted model's prediction... - scripted_output = r18_scripted(dummy_input) # ...and do the same for the scripted version - - unscripted_top5 = F.softmax(unscripted_output, dim=1).topk(5).indices - scripted_top5 = F.softmax(scripted_output, dim=1).topk(5).indices - - print('Python model top 5 results:\n {}'.format(unscripted_top5)) - print('TorchScript model top 5 results:\n {}'.format(scripted_top5)) - -You should see that both versions of the model give the same results: - -:: - - Python model top 5 results: - tensor([[463, 600, 731, 899, 898]]) - TorchScript model top 5 results: - tensor([[463, 600, 731, 899, 898]]) - -With that check confirmed, go ahead and save the model: - -:: - - r18_scripted.save('r18_scripted.pt') - -Loading TorchScript Models in C++ ---------------------------------- - -Create the following C++ file and name it ``ts-infer.cpp``: - -.. code:: cpp - - #include - #include - - - int main(int argc, const char* argv[]) { - if (argc != 2) { - std::cerr << "usage: ts-infer \n"; - return -1; - } - - std::cout << "Loading model...\n"; - - // deserialize ScriptModule - torch::jit::script::Module module; - try { - module = torch::jit::load(argv[1]); - } catch (const c10::Error& e) { - std::cerr << "Error loading model\n"; - std::cerr << e.msg_without_backtrace(); - return -1; - } - - std::cout << "Model loaded successfully\n"; - - torch::NoGradGuard no_grad; // ensures that autograd is off - module.eval(); // turn off dropout and other training-time layers/functions - - // create an input "image" - std::vector inputs; - inputs.push_back(torch::rand({1, 3, 224, 224})); - - // execute model and package output as tensor - at::Tensor output = module.forward(inputs).toTensor(); - - namespace F = torch::nn::functional; - at::Tensor output_sm = F::softmax(output, F::SoftmaxFuncOptions(1)); - std::tuple top5_tensor = output_sm.topk(5); - at::Tensor top5 = std::get<1>(top5_tensor); - - std::cout << top5[0] << "\n"; - - std::cout << "\nDONE\n"; - return 0; - } - -This program: - -- Loads the model you specify on the command line -- Creates a dummy “image” input tensor -- Performs inference on the input - -Also, notice that there is no dependency on TorchVision in this code. -The saved version of your TorchScript model has your learning weights -*and* your computation graph - nothing else is needed. - -Building and Running Your C++ Inference Engine ----------------------------------------------- - -Create the following ``CMakeLists.txt`` file: - -:: - - cmake_minimum_required(VERSION 3.0 FATAL_ERROR) - project(custom_ops) - - find_package(Torch REQUIRED) - - add_executable(ts-infer ts-infer.cpp) - target_link_libraries(ts-infer "${TORCH_LIBRARIES}") - set_property(TARGET ts-infer PROPERTY CXX_STANDARD 11) - -Make the program: - -:: - - cmake -DCMAKE_PREFIX_PATH= - make - -Now, we can run inference in C++, and verify that we get a result: - -:: - - $ ./ts-infer r18_scripted.pt - Loading model... - Model loaded successfully - 418 - 845 - 111 - 892 - 644 - [ CPULongType{5} ] - - DONE - -Important Resources -------------------- - -- `pytorch.org`_ for installation instructions, and more documentation - and tutorials. -- `Introduction to TorchScript tutorial`_ for a deeper initial - exposition of TorchScript -- `Full TorchScript documentation`_ for complete TorchScript language - and API reference - -.. _pytorch.org: https://pytorch.org/ -.. _Introduction to TorchScript tutorial: https://pytorch.org/tutorials/beginner/Intro_to_TorchScript_tutorial.html -.. _Full TorchScript documentation: https://pytorch.org/docs/stable/jit.html -.. _Loading A TorchScript Model in C++ tutorial: https://pytorch.org/tutorials/advanced/cpp_export.html -.. _full TorchScript documentation: https://pytorch.org/docs/stable/jit.html +.. warning:: + TorchScript is deprecated, please use + `torch.export`_ instead. \ No newline at end of file