From 3070b7c08374fc2b350648c16cfa91d4d7ea61f6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Wed, 11 Sep 2024 15:33:57 +0200 Subject: [PATCH] Explain how to add an extension module (#1350) * explain how to add modules * Update extension-modules.rst * Update extension-modules.rst * Update extension-modules.rst * Update extension-modules.rst * Update extension-modules.rst * Update extension-modules.rst * Update extension-modules.rst * Update extension-modules.rst * Update extension-modules.rst * Address Hugo's feedback * Update extension-modules.rst * Update extension-modules.rst * improvements - use distinct names for files to highlight differences in configurations - be more precise on the terminology of an extension module - explain how to build a required module (always built-in) vs an optional module (built-in or dynamic) - address Ezio's review * fixup! sphinx * fixup! indents * fixup! warnings * improve sections * fix markup * improve titles * improve presentation * fixup! markup * simplify snippets * improvements * improvements * some rewordings and cleanups * simplify wording * address Erlend's review * fix indents? * add ref to clinic everywhere when needed * fix typos * address encukou's review * improve the page flow * use sentence case (that's the reason why the previous title felt wrong to me!) * add podman tip * address rest of the review * address Alyssa's review * add details * address review - Add details on `Py_BUILD_CORE_*` macros - Add tips for `Py_LIMITED_API` * Make it easier to update the required ubuntu version * fixup! * fixup! * improve comment * use double quotes instead of single quotes * Address Carol's review. --- _extensions/ubuntu_version.py | 28 + conf.py | 8 + developer-workflow/extension-modules.rst | 672 ++++++++++++++++++++++- 3 files changed, 703 insertions(+), 5 deletions(-) create mode 100644 _extensions/ubuntu_version.py diff --git a/_extensions/ubuntu_version.py b/_extensions/ubuntu_version.py new file mode 100644 index 0000000000..1298f35d98 --- /dev/null +++ b/_extensions/ubuntu_version.py @@ -0,0 +1,28 @@ +"""Sphinx extension to update the required Ubuntu version. + +The required Ubuntu version should be specified in conf.py by:: + + configure_ubuntu_version = "MAJOR.MINOR" # e.g., "22.04" + +The version must match the one used to regenerate the configure script in +https://github.com/python/cpython/blob/main/Tools/build/regen-configure.sh. +""" + +from sphinx.errors import ExtensionError + + +def replace_ubuntu_version(app, docname, source): + """Replace all occurrences of $CONFIGURE_UBUNTU_VERSION$. + + This is needed since RST replacement via ``|...|`` is not supported + in code-blocks directives. + """ + if (ubuntu_version := app.config.configure_ubuntu_version) is None: + raise ExtensionError("configure_ubuntu_version is not set in conf.py") + source[0] = source[0].replace("$CONFIGURE_UBUNTU_VERSION$", ubuntu_version) + + +def setup(app): + app.add_config_value("configure_ubuntu_version", None, "env", types=(str,)) + app.connect("source-read", replace_ubuntu_version) + return {"parallel_read_safe": True, "parallel_write_safe": True} diff --git a/conf.py b/conf.py index fc46f32234..569245df00 100644 --- a/conf.py +++ b/conf.py @@ -1,6 +1,10 @@ +import sys import time +sys.path.insert(0, '_extensions') + extensions = [ + 'ubuntu_version', 'notfound.extension', 'sphinx.ext.extlinks', 'sphinx.ext.intersphinx', @@ -194,3 +198,7 @@ copybutton_prompt_text = "$ " # https://sphinx-copybutton.readthedocs.io/en/latest/use.html#honor-line-continuation-characters-when-copying-multline-snippets copybutton_line_continuation_character = "\\" + +# Must be synchronized with the Ubuntu image version in +# https://github.com/python/cpython/blob/main/Tools/build/regen-configure.sh +configure_ubuntu_version = "22.04" diff --git a/developer-workflow/extension-modules.rst b/developer-workflow/extension-modules.rst index 0384c2b382..db391357ab 100644 --- a/developer-workflow/extension-modules.rst +++ b/developer-workflow/extension-modules.rst @@ -5,13 +5,675 @@ Standard library extension modules ================================== -In this section, we could explain how to write a CPython extension with the C language, but the topic can take a complete book. - -For this reason, we prefer to give you some links where you can read a very good documentation. - -Read the following references: +In this section, we explain how to configure and compile the CPython project +with a C :term:`extension module`. We will not explain how to write a C +extension module and prefer to give you some links where you can read good +documentation: * https://docs.python.org/dev/c-api/ * https://docs.python.org/dev/extending/ * :pep:`399` * https://pythonextensionpatterns.readthedocs.io/en/latest/ + +Some modules in the standard library, such as :mod:`datetime` or :mod:`pickle`, +have identical implementations in C and Python; the C implementation, when +available, is expected to improve performance (such extension modules are +commonly referred to as *accelerator modules*). + +Other modules mainly implemented in Python may import a C helper extension +providing implementation details (for instance, the :mod:`csv` module uses +the internal :mod:`!_csv` module defined in :cpy-file:`Modules/_csv.c`). + +Classifying extension modules +============================= + +Extension modules can be classified into two categories: + +* A *built-in* extension module is a module built and shipped with + the Python interpreter. A built-in module is *statically* linked + into the interpreter, thereby lacking a :attr:`__file__` attribute. + + .. seealso:: :data:`sys.builtin_module_names` --- names of built-in modules. + + Built-in modules are built with the :c:macro:`!Py_BUILD_CORE_BUILTIN` + macro defined. + +* A *shared* (or *dynamic*) extension module is built as a shared library + (``.so`` or ``.dll`` file) and is *dynamically* linked into the interpreter. + + In particular, the module's :attr:`__file__` attribute contains the path + to the ``.so`` or ``.dll`` file. + + Shared modules are built with the :c:macro:`!Py_BUILD_CORE_MODULE` + macro defined. Using the :c:macro:`!Py_BUILD_CORE_BUILTIN` macro + instead causes an :exc:`ImportError` when importing the module. + +.. note:: + + Informally, built-in extension modules can be regarded as *required* + while shared extension modules are *optional* in the sense that they + might be supplied, overridden or disabled externally. + + Usually, accelerator modules are built as *shared* extension modules, + especially if they already have a pure Python implementation. + +According to :pep:`399`, *new* extension modules MUST provide a working and +tested pure Python implementation, unless a special dispensation from +the :github:`Steering Council ` is given. + +Adding an extension module to CPython +===================================== + +Assume that the standard library contains a pure Python module :mod:`!foo` +with the following :func:`!foo.greet` function: + +.. code-block:: python + :caption: Lib/foo.py + + def greet(): + return "Hello World!" + +Instead of using the Python implementation of :func:`!foo.greet`, we want to +use its corresponding C extension implementation exposed in the :mod:`!_foo` +module. Ideally, we want to modify :cpy-file:`!Lib/foo.py` as follows: + +.. code-block:: python + :caption: Lib/foo.py + + try: + # use the C implementation if possible + from _foo import greet + except ImportError: + # fallback to the pure Python implementation + def greet(): + return "Hello World!" + +.. note:: + + Accelerator modules should *never* be imported directly. The convention is + to mark them as private implementation details with the underscore prefix + (namely, :mod:`!_foo` in this example). + +In order to incorporate the accelerator module, we need to determine: + +- where to update the CPython project tree with the extension module source code, +- which files to modify to configure and compile the CPython project, and +- which ``Makefile`` rules to invoke at the end. + +Updating the CPython project tree +--------------------------------- + +Usually, accelerator modules are added in the :cpy-file:`Modules` directory of +the CPython project. If more than one file is needed for the extension module, +it is more convenient to create a sub-directory in :cpy-file:`Modules`. + +In the simplest example where the extension module consists of one file, it may +be placed in :cpy-file:`Modules` as ``Modules/_foomodule.c``. For a non-trivial +example of the extension module :mod:`!_foo`, we consider the following working +tree: + +- :ref:`Modules/_foo/_foomodule.c` --- the extension module implementation. +- :ref:`Modules/_foo/helper.h` --- the extension helpers declarations. +- :ref:`Modules/_foo/helper.c` --- the extension helpers implementations. + +By convention, the source file containing the extension module implementation +is called ``module.c``, where ```` is the name of the module that +will be later imported (in our case :mod:`!_foo`). In addition, the directory +containing the implementation should also be named similarly. + +.. code-block:: c + :caption: Modules/_foo/helper.h + :name: Modules/_foo/helper.h + + #ifndef _FOO_HELPER_H + #define _FOO_HELPER_H + + #include "Python.h" + + typedef struct { + /* ... */ + } foomodule_state; + + static inline foomodule_state * + get_foomodule_state(PyObject *module) + { + void *state = PyModule_GetState(module); + assert(state != NULL); + return (foomodule_state *)state; + } + + /* Helper used in Modules/_foo/_foomodule.c + * but implemented in Modules/_foo/helper.c. + */ + extern PyObject * + _Py_greet_fast(void); + + #endif // _FOO_HELPER_H + +.. tip:: + + Functions or data that do not need to be shared across different C source + files should be declared ``static`` to avoid exporting their symbols from + ``libpython``. + + If symbols need to be exported, their names must start with ``Py`` or + ``_Py``. This can be verified by ``make smelly``. For more details, + please refer to the section on :ref:`Changing Python's C API `. + +.. code-block:: c + :caption: Modules/_foo/helper.c + :name: Modules/_foo/helper.c + + #include "_foomodule.h" + + PyObject *_Py_greet_fast(void) { + return PyUnicode_FromString("Hello World!"); + } + +.. code-block:: c + :caption: Modules/_foo/_foomodule.c + :name: Modules/_foo/_foomodule.c + + #include "helper.h" + #include "clinic/_foomodule.c.h" + + /* Functions for the extension module's state */ + static int + foomodule_exec(PyObject *module) + { + // imports, static attributes, exported classes, etc + return 0; + } + + static int + foomodule_traverse(PyObject *m, visitproc visit, void *arg) + { + foomodule_state *st = get_foomodule_state(m); + // call Py_VISIT() on the state attributes + return 0; + } + + static int + foomodule_clear(PyObject *m) + { + foomodule_state *st = get_foomodule_state(m); + // call Py_CLEAR() on the state attributes + return 0; + } + + static void + foomodule_free(void *m) { + (void)foomodule_clear((PyObject *)m); + } + + /* Implementation of publicly exported functions. */ + + /*[clinic input] + module foo + [clinic start generated code]*/ + /*[clinic end generated code: output=... input=...]*/ + + /*[clinic input] + foo.greet -> object + + [clinic start generated code]*/ + + static PyObject * + foo_greet_impl(PyObject *module) + /*[clinic end generated code: output=... input=...]*/ + { + return _Py_greet_fast(); + } + + /* Exported module's data */ + + static PyMethodDef foomodule_methods[] = { + // macro in 'clinic/_foomodule.c.h' after running 'make clinic' + FOO_GREET_METHODDEF + {NULL, NULL} + }; + + static struct PyModuleDef_Slot foomodule_slots[] = { + // 'foomodule_exec' may be NULL if the state is trivial + {Py_mod_exec, foomodule_exec}, + {Py_mod_multiple_interpreters, Py_MOD_PER_INTERPRETER_GIL_SUPPORTED}, + {Py_mod_gil, Py_MOD_GIL_NOT_USED}, + {0, NULL}, + }; + + static struct PyModuleDef foomodule = { + PyModuleDef_HEAD_INIT, + .m_name = "_foo", + .m_doc = "some doc", // or NULL if not needed + .m_size = sizeof(foomodule_state), + .m_methods = foomodule_methods, + .m_slots = foomodule_slots, + .m_traverse = foomodule_traverse, // or NULL if the state is trivial + .m_clear = foomodule_clear, // or NULL if the state is trivial + .m_free = foomodule_free, // or NULL if the state is trivial + }; + + PyMODINIT_FUNC + PyInit__foo(void) + { + return PyModuleDef_Init(&foomodule); + } + +.. tip:: + + Recall that the ``PyInit_`` function must be suffixed by the + module name ```` used in import statements (here ``_foo``), + and which usually coincides with :c:member:`PyModuleDef.m_name`. + + Other identifiers such as those used in :ref:`Argument Clinic ` + inputs do not have such naming requirements. + +Configuring the CPython project +------------------------------- + +Now that we have added our extension module to the CPython source tree, +we need to update some configuration files in order to compile the CPython +project on different platforms. + +Updating :cpy-file:`!Modules/Setup.{bootstrap,stdlib}.in` +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Depending on whether the extension module is required to get a functioning +interpreter or not, we update :cpy-file:`Modules/Setup.bootstrap.in` or +:cpy-file:`Modules/Setup.stdlib.in`. In the former case, the extension +module is necessarily built as a built-in extension module. + +.. tip:: + + For accelerator modules, :cpy-file:`Modules/Setup.stdlib.in` should be + preferred over :cpy-file:`Modules/Setup.bootstrap.in`. + +For built-in extension modules, update :cpy-file:`Modules/Setup.bootstrap.in` +by adding the following line after the ``*static*`` marker: + +.. code-block:: text + :caption: :cpy-file:`Modules/Setup.bootstrap.in` + :emphasize-lines: 3 + + *static* + ... + _foo _foo/_foomodule.c _foo/helper.c + ... + +The syntax is `` `` where ```` is the name of the +module used in :keyword:`import` statements and ```` is the list +of space-separated source files. + +For other extension modules, update :cpy-file:`Modules/Setup.stdlib.in` +by adding the following line after the ``*@MODULE_BUILDTYPE@*`` marker +but before the ``*shared*`` marker: + +.. code-block:: text + :caption: :cpy-file:`Modules/Setup.stdlib.in` + :emphasize-lines: 3 + + *@MODULE_BUILDTYPE@* + ... + @MODULE__FOO_TRUE@_foo _foo/_foomodule.c _foo/helper.c + ... + *shared* + +The ``@MODULE__TRUE@`` marker expects ```` to +be the upper-cased form of ````, where ```` has the same meaning +as before (in our case, ```` and ```` are ``_FOO`` and +``_foo`` respectively). The marker is followed by the list of source files. + +If the extension module must be built as a *shared* module, put the +``@MODULE__FOO_TRUE@_foo`` line after the ``*shared*`` marker: + +.. code-block:: text + :caption: :cpy-file:`Modules/Setup.stdlib.in` + :emphasize-lines: 4 + + ... + *shared* + ... + @MODULE__FOO_TRUE@_foo _foo/_foomodule.c _foo/helper.c + +Updating :cpy-file:`configure.ac` +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +.. add section about configuration variable afterwards + +* Locate the ``SRCDIRS`` variable and add the following line: + + .. code-block:: text + :caption: :cpy-file:`configure.ac` + :emphasize-lines: 4 + + AC_SUBST([SRCDIRS]) + SRCDIRS="\ + ... + Modules/_foo \ + ..." + + .. note:: + + This step is only needed when adding new source directories to + the CPython project. + +* Find the section containing ``PY_STDLIB_MOD`` and ``PY_STDLIB_MOD_SIMPLE`` + usages and add the following line: + + .. code-block:: text + :caption: :cpy-file:`configure.ac` + :emphasize-lines: 3 + + dnl always enabled extension modules + ... + PY_STDLIB_MOD_SIMPLE([_foo], [-I\$(srcdir)/Modules/_foo], []) + ... + + The ``PY_STDLIB_MOD_SIMPLE`` macro takes as arguments: + + * the module name ```` used in :keyword:`import` statements, + * the compiler flags (CFLAGS), and + * the linker flags (LDFLAGS). + + If the extension module may not be enabled or supported depending on the + host configuration, use the ``PY_STDLIB_MOD`` macro instead, which takes + as arguments: + + * the module name ```` used in :keyword:`import` statements, + * a boolean indicating whether the extension is **enabled** or not, + * a boolean indicating whether the extension is **supported** or not, + * the compiler flags (CFLAGS), and + * the linker flags (LDFLAGS). + + For instance, enabling the :mod:`!_foo` extension on Linux platforms, but + only providing support for 32-bit architecture, is achieved as follows: + + .. code-block:: text + :caption: :cpy-file:`configure.ac` + :emphasize-lines: 2, 3 + + PY_STDLIB_MOD([_foo], + [test "$ac_sys_system" = "Linux"], + [test "$ARCH_RUN_32BIT" = "true"], + [-I\$(srcdir)/Modules/_foo], []) + + More generally, the host's configuration status of the extension is + determined as follows: + + +-----------+-----------------+----------+ + | Enabled | Supported | Status | + +===========+=================+==========+ + | true | true | yes | + +-----------+-----------------+----------+ + | true | false | missing | + +-----------+-----------------+----------+ + | false | true or false | disabled | + +-----------+-----------------+----------+ + + The extension status is ``n/a`` if the extension is marked unavailable + by the ``PY_STDLIB_MOD_SET_NA`` macro. To mark an extension as unavailable, + find the usages of ``PY_STDLIB_MOD_SET_NA`` in :cpy-file:`configure.ac` and + add the following line: + + .. code-block:: text + :caption: :cpy-file:`configure.ac` + :emphasize-lines: 4 + + dnl Modules that are not available on some platforms + AS_CASE([$ac_sys_system], + ... + [PLATFORM_NAME], [PY_STDLIB_MOD_SET_NA([_foo])], + ... + ) + +.. tip:: + + Consider reading the comments and configurations for existing modules + in :cpy-file:`configure.ac` for guidance on adding new external build + dependencies for extension modules that need them. + +Updating :cpy-file:`Makefile.pre.in` +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +If needed, add the following line to the section for module dependencies: + +.. code-block:: text + :caption: :cpy-file:`Makefile.pre.in` + :emphasize-lines: 4 + + ########################################################################## + # Module dependencies and platform-specific files + ... + MODULE__FOO_DEPS=$(srcdir)/Modules/_foo/helper.h + ... + +The ``MODULE__DEPS`` variable follows the same naming +requirements as the ``@MODULE__TRUE@`` marker. + +Updating MSVC project files +^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +We describe the minimal steps for compiling on Windows using MSVC. + +* Update :cpy-file:`PC/config.c`: + + .. code-block:: c + :caption: :cpy-file:`PC/config.c` + :emphasize-lines: 3, 8 + + ... + // add the entry point prototype + extern PyObject* PyInit__foo(void); + ... + // update the entry points table + struct _inittab _PyImport_Inittab[] = { + ... + {"_foo", PyInit__foo}, + ... + {0, 0} + }; + ... + + Each item in ``_PyImport_Inittab`` consists of the module name to import, + here :mod:`!_foo`, with the corresponding ``PyInit_*`` function correctly + suffixed. + +* Update :cpy-file:`PCbuild/pythoncore.vcxproj`: + + .. code-block:: xml + :caption: :cpy-file:`PCbuild/pythoncore.vcxproj` + :emphasize-lines: 4, 11-12 + + + + ... + + ... + + + + + ... + + + ... + + +* Update :cpy-file:`PCbuild/pythoncore.vcxproj.filters`: + + .. code-block:: xml + :caption: :cpy-file:`PCbuild/pythoncore.vcxproj.filters` + :emphasize-lines: 4-6, 13-18 + + + + ... + + Modules\_foo + + ... + + + + + ... + + Modules\_foo + + + Modules\_foo + + ... + + +.. tip:: + + Header files use ```` tags, whereas + source files use ```` tags. + + +Compiling the CPython project +----------------------------- + +Now that the configuration is in place, it remains to compile the project: + +.. code-block:: shell + + make regen-configure + ./configure + make regen-all + make regen-stdlib-module-names + make + +.. tip:: + + Use ``make -j`` to speed-up compilation by utilizing as many CPU cores + as possible or ``make -jN`` to allow at most *N* concurrent jobs. + +* ``make regen-configure`` updates the :cpy-file:`configure` script. + +* ``make regen-all`` is responsible for regenerating header files and + invoking other scripts, such as :ref:`Argument Clinic `. + Execute this rule if you do not know which files should be updated. + +* ``make regen-stdlib-module-names`` updates the standard module names, making + :mod:`!_foo` discoverable and importable via ``import _foo``. + +* The final ``make`` step is generally not needed since the previous ``make`` + invokations may completely rebuild the project, but it could be needed in + some specific cases. + +Troubleshooting +--------------- + +This section addresses common issues that you may face when following +this example of adding an extension module. + +No rule to make target ``regen-configure`` +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +This usually happens after running ``make distclean`` (which removes +the ``Makefile``). The solution is to regenerate the :cpy-file:`configure` +script as follows: + +.. code-block:: shell + + ./configure # for creating the 'Makefile' file + make regen-configure # for updating the 'configure' script + ./configure # for updating the 'Makefile' file + +If missing, the :cpy-file:`configure` script can be regenerated +by executing :cpy-file:`Tools/build/regen-configure.sh`: + +.. code-block:: shell + + ./Tools/build/regen-configure.sh # create an up-to-date 'configure' + ./configure # create an up-to-date 'Makefile' + +``make regen-configure`` and missing permissions +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Since this rule requires `Docker `_ to be +running, the following can be done on Linux platforms (``systemctl``-based): + +.. code-block:: shell + + systemctl status docker # is the Docker service running? + sudo systemctl start docker # start it if it is not + sudo systemctl restart docker # or restart it if the issue persists + +If Docker complains about missing permissions, this Stack Overflow post +could be useful in solving the issue: `How to fix docker: permission denied +`_. + +Once the Docker service is running, check that you have an `Ubuntu +$CONFIGURE_UBUNTU_VERSION$ image `_, +or pull it if it is not case: + +.. code-block:: shell + + # check for the Docker image presence + docker images ubuntu:$CONFIGURE_UBUNTU_VERSION$ + # pull the Docker image if needed + docker image pull ubuntu:$CONFIGURE_UBUNTU_VERSION$ + +.. tip:: + + If the issue persists, you may try `podman `_. + The commands for listing or pulling an image are the same as ``docker``. + +Missing ``Py_BUILD_CORE`` define when using internal headers +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +By default, the CPython :ref:`Stable ABI ` is exposed via +:code:`#include "Python.h"`. In some cases, this may be insufficient +and internal headers from :cpy-file:`Include/internal` are needed; +in particular, those headers require the :c:macro:`!Py_BUILD_CORE` +macro to be defined. + +To that end, one should define the :c:macro:`!Py_BUILD_CORE_BUILTIN` +or the :c:macro:`!Py_BUILD_CORE_MODULE` macro depending on whether the +extension module is built-in or shared. Using either of the two macros +implies :c:macro:`!Py_BUILD_CORE` and gives access to CPython internals: + +.. code-block:: c + :caption: Definition of :c:macro:`!Py_BUILD_CORE_BUILTIN` + + #ifndef Py_BUILD_CORE_MODULE + # define Py_BUILD_CORE_BUILTIN 1 + #endif + +.. code-block:: c + :caption: Definition of :c:macro:`!Py_BUILD_CORE_MODULE` + + #ifndef Py_BUILD_CORE_BUILTIN + # define Py_BUILD_CORE_MODULE 1 + #endif + +Tips +---- + +In this section, we give some tips for improving the quality of +extension modules meant to be included in the standard library. + +Restricting to the Limited API +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +In order for non-CPython implementations to benefit from new extension modules, +it is recommended to use the :ref:`Limited API `. Instead of +exposing the entire Stable ABI, define the :c:macro:`Py_LIMITED_API` macro +*before* the :code:`#include "Python.h"` directive: + +.. code-block:: c + :caption: Using the 3.13 Limited API. + :emphasize-lines: 3, 6 + + #include "pyconfig.h" // Py_GIL_DISABLED + #ifndef Py_GIL_DISABLED + # define Py_LIMITED_API 0x030d0000 + #endif + + #include "Python.h" + +This makes the extension module non-CPython implementation-friendly by +removing the dependencies to CPython internals.