From a21d80c6b4c9e27e5472fed837bb6308639b0492 Mon Sep 17 00:00:00 2001 From: Kefu Chai Date: Sat, 3 Feb 2024 19:22:15 +0800 Subject: [PATCH 1/3] mgr: stop using deprecated API to initialize Python MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Py_SetProgramName() is deprecated since CPython 3.11, see https://docs.python.org/3/c-api/init_config.html . `Py_InitializeFromConfig()` and friends were introduced by CPython 3.8, but we still need to support CPython 3.6 which is shipped by CentOS8. so we have to be backward compatible with the older Python versions. so let's use new machinary to initialize the Python interpretor if the tree is compiled with CPython 3.8 and up, i.e., PY_VERSION_HEX >= 0x03080000. so that this piece of code can be tested on ubuntu:jammy, which ships Python 3.10, see https://packages.ubuntu.com/jammy/amd64/python3 this change addresses following compiling warning: ``` [428/753] Building CXX object src/mgr/CMakeFiles/ceph-mgr.dir/PyModuleRegistry.cc.o /var/ssd/ceph/src/mgr/PyModuleRegistry.cc: In member function ‘void PyModuleRegistry::init()’: /var/ssd/ceph/src/mgr/PyModuleRegistry.cc:49:20: warning: ‘void Py_SetProgramName(const wchar_t*)’ is deprecated [-Wdeprecated-declarations] 49 | Py_SetProgramName(const_cast(WCHAR(MGR_PYTHON_EXECUTABLE))); | ~~~~~~~~~~~~~~~~~^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ In file included from /usr/include/python3.12/Python.h:94, from /var/ssd/ceph/src/mgr/PyModule.h:22, from /var/ssd/ceph/src/mgr/PyModuleRegistry.h:18, from /var/ssd/ceph/src/mgr/PyModuleRegistry.cc:14: /usr/include/python3.12/pylifecycle.h:37:38: note: declared here 37 | Py_DEPRECATED(3.11) PyAPI_FUNC(void) Py_SetProgramName(const wchar_t *); | ^~~~~~~~~~~~~~~~~` ``` Signed-off-by: Kefu Chai --- src/mgr/PyModuleRegistry.cc | 36 +++++++++++++++++++++++++++++++++++- 1 file changed, 35 insertions(+), 1 deletion(-) diff --git a/src/mgr/PyModuleRegistry.cc b/src/mgr/PyModuleRegistry.cc index eb2d2babe75fa..1fd92b9028290 100644 --- a/src/mgr/PyModuleRegistry.cc +++ b/src/mgr/PyModuleRegistry.cc @@ -14,6 +14,7 @@ #include "PyModuleRegistry.h" #include +#include #include "include/stringify.h" #include "common/errno.h" @@ -46,14 +47,47 @@ void PyModuleRegistry::init() // Set up global python interpreter #define WCHAR(s) L ## #s +#if PY_VERSION_HEX >= 0x03080000 + PyConfig py_config; + // do not enable isolated mode, otherwise we would not be able to have access + // to the site packages. since we cannot import any module before initializing + // the interpreter, we would not be able to use "site" module for retrieving + // the path to site packager. we import "site" module for retrieving + // sitepackages in Python < 3.8 though, this does not apply to the + // initialization with PyConfig. + PyConfig_InitPythonConfig(&py_config); + BOOST_SCOPE_EXIT_ALL(&py_config) { + PyConfig_Clear(&py_config); + }; +#if PY_VERSION_HEX >= 0x030b0000 + py_config.safe_path = 0; +#endif + py_config.parse_argv = 0; + py_config.configure_c_stdio = 0; + py_config.install_signal_handlers = 0; + py_config.pathconfig_warnings = 0; + + PyStatus status; + status = PyConfig_SetString(&py_config, &py_config.program_name, WCHAR(MGR_PYTHON_EXECUTABLE)); + ceph_assertf(!PyStatus_Exception(status), "PyConfig_SetString: %s:%s", status.func, status.err_msg); + // Add more modules + if (g_conf().get_val("daemonize")) { + PyImport_AppendInittab("ceph_logger", PyModule::init_ceph_logger); + } + PyImport_AppendInittab("ceph_module", PyModule::init_ceph_module); + status = Py_InitializeFromConfig(&py_config); + ceph_assertf(!PyStatus_Exception(status), "Py_InitializeFromConfig: %s:%s", status.func, status.err_msg); +#else Py_SetProgramName(const_cast(WCHAR(MGR_PYTHON_EXECUTABLE))); -#undef WCHAR // Add more modules if (g_conf().get_val("daemonize")) { PyImport_AppendInittab("ceph_logger", PyModule::init_ceph_logger); } PyImport_AppendInittab("ceph_module", PyModule::init_ceph_module); Py_InitializeEx(0); +#endif // PY_VERSION_HEX >= 0x03080000 +#undef WCHAR + #if PY_VERSION_HEX < 0x03090000 // Let CPython know that we will be calling it back from other // threads in future. From e417c1c935a35dbed838f1e664729647817a47e2 Mon Sep 17 00:00:00 2001 From: Kefu Chai Date: Sat, 3 Feb 2024 19:49:13 +0800 Subject: [PATCH 2/3] mgr: set argv for python in PyModuleRegistry MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit before this change, we setup the progname for Python interpreter, but setup the argv for it in PyModule. and we are using deprecated API to initialize Python interpreter. in this change, let's do this in a single place for better maintainability. also, take this opportunity, to use the non-deprecated API to initialize interpreter on Python >= 3.8. this silence the warning when compiling ceph-mgr with CPython 3.12: ``` /var/ssd/ceph/src/mgr/PyModule.cc: In member function ‘int PyModule::load(PyThreadState*)’: /var/ssd/ceph/src/mgr/PyModule.cc:363:20: warning: ‘void PySys_SetArgv(int, wchar_t**)’ is deprecated [-Wdeprecated-declarations] 363 | PySys_SetArgv(1, (wchar_t**)argv); | ~~~~~~~~~~~~~^~~~~~~~~~~~~~~~~~~~ In file included from /usr/include/python3.12/Python.h:96, from /var/ssd/ceph/src/mgr/BaseMgrModule.h:4, from /var/ssd/ceph/src/mgr/PyModule.cc:14: /usr/include/python3.12/sysmodule.h:13:38: note: declared here 13 | Py_DEPRECATED(3.11) PyAPI_FUNC(void) PySys_SetArgv(int, wchar_t **); | ^~~~~~~~~~~~~ ``` Signed-off-by: Kefu Chai --- src/mgr/PyModule.cc | 4 ---- src/mgr/PyModuleRegistry.cc | 7 +++++++ 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/src/mgr/PyModule.cc b/src/mgr/PyModule.cc index 084cf3ffc1eac..e27c3693f3c85 100644 --- a/src/mgr/PyModule.cc +++ b/src/mgr/PyModule.cc @@ -357,10 +357,6 @@ int PyModule::load(PyThreadState *pMainThreadState) return -EINVAL; } else { pMyThreadState.set(thread_state); - // Some python modules do not cope with an unpopulated argv, so lets - // fake one. This step also picks up site-packages into sys.path. - const wchar_t *argv[] = {L"ceph-mgr"}; - PySys_SetArgv(1, (wchar_t**)argv); // Configure sys.path to include mgr_module_path string paths = (g_conf().get_val("mgr_module_path") + ':' + get_site_packages() + ':'); diff --git a/src/mgr/PyModuleRegistry.cc b/src/mgr/PyModuleRegistry.cc index 1fd92b9028290..2370da21e7073 100644 --- a/src/mgr/PyModuleRegistry.cc +++ b/src/mgr/PyModuleRegistry.cc @@ -70,6 +70,11 @@ void PyModuleRegistry::init() PyStatus status; status = PyConfig_SetString(&py_config, &py_config.program_name, WCHAR(MGR_PYTHON_EXECUTABLE)); ceph_assertf(!PyStatus_Exception(status), "PyConfig_SetString: %s:%s", status.func, status.err_msg); + // Some python modules do not cope with an unpopulated argv, so lets + // fake one. This step also picks up site-packages into sys.path. + const wchar_t* argv[] = {L"ceph-mgr"}; + status = PyConfig_SetArgv(&py_config, 1, (wchar_t *const *)argv); + ceph_assertf(!PyStatus_Exception(status), "PyConfig_SetArgv: %s:%s", status.func, status.err_msg); // Add more modules if (g_conf().get_val("daemonize")) { PyImport_AppendInittab("ceph_logger", PyModule::init_ceph_logger); @@ -85,6 +90,8 @@ void PyModuleRegistry::init() } PyImport_AppendInittab("ceph_module", PyModule::init_ceph_module); Py_InitializeEx(0); + const wchar_t *argv[] = {L"ceph-mgr"}; + PySys_SetArgv(1, (wchar_t**)argv); #endif // PY_VERSION_HEX >= 0x03080000 #undef WCHAR From 45832deb6ccdc4886a27cf506bb46e9edacd3299 Mon Sep 17 00:00:00 2001 From: Kefu Chai Date: Sat, 3 Feb 2024 21:09:27 +0800 Subject: [PATCH 3/3] mgr: add site package paths in PyModuleRegistry MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit before this change, we add the paths of site packages to sys.path when starting subinterpretors for each of the mgr modules. this works just fine. but in Python 3.11, it deprecates `PySys_SetPath()` in favor of PyConfig machinary, which sets the module search paths in PyConfig, before calling `Py_InitializeFromConfig()`. so, to set the module search paths with the new machinary, we need to do this in `PyModuleRegistry`, where we initialize the global Python interpretor using the new PyConfig machinary. and since we've switched to the new PyConfig machinary when compiling with Python 3.8 and up. in this change, to unify the implementation of pre and post Python 3.8, we set the module search paths in PyModuleRegistry. because PyConfig imports the site packages by default, and we are allowed to append a new path to the existing search paths, we just append the configured `mgr_module_path` when compiling with Python 3.8 and up. and when it comes to lower versions of Python, the existing behavior is preserved. this change should silence the compiling warning like: ``` /var/ssd/ceph/src/mgr/PyModule.cc:368:20: warning: ‘void PySys_SetPath(const wchar_t*)’ is deprecated [-Wdeprecated-declarations] 368 | PySys_SetPath(const_cast(sys_path.c_str())); | ~~~~~~~~~~~~~^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /usr/include/python3.12/sysmodule.h:15:38: note: declared here 15 | Py_DEPRECATED(3.11) PyAPI_FUNC(void) PySys_SetPath(const wchar_t *); | ^~~~~~~~~~~~~ ``` Signed-off-by: Kefu Chai --- src/mgr/PyModule.cc | 74 ------------------------------- src/mgr/PyModule.h | 1 - src/mgr/PyModuleRegistry.cc | 86 +++++++++++++++++++++++++++++++++++++ src/mgr/PyModuleRegistry.h | 1 + 4 files changed, 87 insertions(+), 75 deletions(-) diff --git a/src/mgr/PyModule.cc b/src/mgr/PyModule.cc index e27c3693f3c85..02a40526f7b8c 100644 --- a/src/mgr/PyModule.cc +++ b/src/mgr/PyModule.cc @@ -47,7 +47,6 @@ std::string PyModule::mgr_store_prefix = "mgr/"; using std::string; -using std::wstring; // decode a Python exception into a string std::string handle_pyerror( @@ -231,72 +230,6 @@ std::pair PyModuleConfig::set_config( } } -std::string PyModule::get_site_packages() -{ - std::stringstream site_packages; - - // CPython doesn't auto-add site-packages dirs to sys.path for us, - // but it does provide a module that we can ask for them. - auto site_module = PyImport_ImportModule("site"); - ceph_assert(site_module); - - auto site_packages_fn = PyObject_GetAttrString(site_module, "getsitepackages"); - if (site_packages_fn != nullptr) { - auto site_packages_list = PyObject_CallObject(site_packages_fn, nullptr); - ceph_assert(site_packages_list); - - auto n = PyList_Size(site_packages_list); - for (Py_ssize_t i = 0; i < n; ++i) { - if (i != 0) { - site_packages << ":"; - } - site_packages << PyUnicode_AsUTF8(PyList_GetItem(site_packages_list, i)); - } - - Py_DECREF(site_packages_list); - Py_DECREF(site_packages_fn); - } else { - // Fall back to generating our own site-packages paths by imitating - // what the standard site.py does. This is annoying but it lets us - // run inside virtualenvs :-/ - - auto site_packages_fn = PyObject_GetAttrString(site_module, "addsitepackages"); - ceph_assert(site_packages_fn); - - auto known_paths = PySet_New(nullptr); - auto pArgs = PyTuple_Pack(1, known_paths); - PyObject_CallObject(site_packages_fn, pArgs); - Py_DECREF(pArgs); - Py_DECREF(known_paths); - Py_DECREF(site_packages_fn); - - auto sys_module = PyImport_ImportModule("sys"); - ceph_assert(sys_module); - auto sys_path = PyObject_GetAttrString(sys_module, "path"); - ceph_assert(sys_path); - - dout(1) << "sys.path:" << dendl; - auto n = PyList_Size(sys_path); - bool first = true; - for (Py_ssize_t i = 0; i < n; ++i) { - dout(1) << " " << PyUnicode_AsUTF8(PyList_GetItem(sys_path, i)) << dendl; - if (first) { - first = false; - } else { - site_packages << ":"; - } - site_packages << PyUnicode_AsUTF8(PyList_GetItem(sys_path, i)); - } - - Py_DECREF(sys_path); - Py_DECREF(sys_module); - } - - Py_DECREF(site_module); - - return site_packages.str(); -} - PyObject* PyModule::init_ceph_logger() { auto py_logger = PyModule_Create(&ceph_logger_module); @@ -357,13 +290,6 @@ int PyModule::load(PyThreadState *pMainThreadState) return -EINVAL; } else { pMyThreadState.set(thread_state); - // Configure sys.path to include mgr_module_path - string paths = (g_conf().get_val("mgr_module_path") + ':' + - get_site_packages() + ':'); - wstring sys_path(wstring(begin(paths), end(paths)) + Py_GetPath()); - PySys_SetPath(const_cast(sys_path.c_str())); - dout(10) << "Computed sys.path '" - << string(begin(sys_path), end(sys_path)) << "'" << dendl; } } // Environment is all good, import the external module diff --git a/src/mgr/PyModule.h b/src/mgr/PyModule.h index 8d88ff94c6271..177447c2cb323 100644 --- a/src/mgr/PyModule.h +++ b/src/mgr/PyModule.h @@ -51,7 +51,6 @@ class PyModule mutable ceph::mutex lock = ceph::make_mutex("PyModule::lock"); private: const std::string module_name; - std::string get_site_packages(); int load_subclass_of(const char* class_name, PyObject** py_class); // Did the MgrMap identify this module as one that should run? diff --git a/src/mgr/PyModuleRegistry.cc b/src/mgr/PyModuleRegistry.cc index 2370da21e7073..5adce55bf0435 100644 --- a/src/mgr/PyModuleRegistry.cc +++ b/src/mgr/PyModuleRegistry.cc @@ -66,6 +66,9 @@ void PyModuleRegistry::init() py_config.configure_c_stdio = 0; py_config.install_signal_handlers = 0; py_config.pathconfig_warnings = 0; + // site_import is 1 by default, but set it explicitly as we do import + // site packages manually for Python < 3.8 + py_config.site_import = 1; PyStatus status; status = PyConfig_SetString(&py_config, &py_config.program_name, WCHAR(MGR_PYTHON_EXECUTABLE)); @@ -80,6 +83,15 @@ void PyModuleRegistry::init() PyImport_AppendInittab("ceph_logger", PyModule::init_ceph_logger); } PyImport_AppendInittab("ceph_module", PyModule::init_ceph_module); + // Configure sys.path to include mgr_module_path + auto pythonpath_env = g_conf().get_val("mgr_module_path"); + if (const char* pythonpath = getenv("PYTHONPATH")) { + pythonpath_env += ":"; + pythonpath_env += pythonpath; + } + status = PyConfig_SetBytesString(&py_config, &py_config.pythonpath_env, pythonpath_env.data()); + ceph_assertf(!PyStatus_Exception(status), "PyConfig_SetBytesString: %s:%s", status.func, status.err_msg); + dout(10) << "set PYTHONPATH to " << std::quoted(pythonpath_env) << dendl; status = Py_InitializeFromConfig(&py_config); ceph_assertf(!PyStatus_Exception(status), "Py_InitializeFromConfig: %s:%s", status.func, status.err_msg); #else @@ -92,6 +104,14 @@ void PyModuleRegistry::init() Py_InitializeEx(0); const wchar_t *argv[] = {L"ceph-mgr"}; PySys_SetArgv(1, (wchar_t**)argv); + + std::string paths = (g_conf().get_val("mgr_module_path") + ':' + + get_site_packages() + ':'); + std::wstring sys_path(begin(paths), end(paths)); + sys_path += Py_GetPath(); + PySys_SetPath(const_cast(sys_path.c_str())); + dout(10) << "Computed sys.path '" + << std::string(begin(sys_path), end(sys_path)) << "'" << dendl; #endif // PY_VERSION_HEX >= 0x03080000 #undef WCHAR @@ -258,6 +278,72 @@ void PyModuleRegistry::active_start( } } +std::string PyModuleRegistry::get_site_packages() +{ + std::stringstream site_packages; + + // CPython doesn't auto-add site-packages dirs to sys.path for us, + // but it does provide a module that we can ask for them. + auto site_module = PyImport_ImportModule("site"); + ceph_assert(site_module); + + auto site_packages_fn = PyObject_GetAttrString(site_module, "getsitepackages"); + if (site_packages_fn != nullptr) { + auto site_packages_list = PyObject_CallObject(site_packages_fn, nullptr); + ceph_assert(site_packages_list); + + auto n = PyList_Size(site_packages_list); + for (Py_ssize_t i = 0; i < n; ++i) { + if (i != 0) { + site_packages << ":"; + } + site_packages << PyUnicode_AsUTF8(PyList_GetItem(site_packages_list, i)); + } + + Py_DECREF(site_packages_list); + Py_DECREF(site_packages_fn); + } else { + // Fall back to generating our own site-packages paths by imitating + // what the standard site.py does. This is annoying but it lets us + // run inside virtualenvs :-/ + + auto site_packages_fn = PyObject_GetAttrString(site_module, "addsitepackages"); + ceph_assert(site_packages_fn); + + auto known_paths = PySet_New(nullptr); + auto pArgs = PyTuple_Pack(1, known_paths); + PyObject_CallObject(site_packages_fn, pArgs); + Py_DECREF(pArgs); + Py_DECREF(known_paths); + Py_DECREF(site_packages_fn); + + auto sys_module = PyImport_ImportModule("sys"); + ceph_assert(sys_module); + auto sys_path = PyObject_GetAttrString(sys_module, "path"); + ceph_assert(sys_path); + + dout(1) << "sys.path:" << dendl; + auto n = PyList_Size(sys_path); + bool first = true; + for (Py_ssize_t i = 0; i < n; ++i) { + dout(1) << " " << PyUnicode_AsUTF8(PyList_GetItem(sys_path, i)) << dendl; + if (first) { + first = false; + } else { + site_packages << ":"; + } + site_packages << PyUnicode_AsUTF8(PyList_GetItem(sys_path, i)); + } + + Py_DECREF(sys_path); + Py_DECREF(sys_module); + } + + Py_DECREF(site_module); + + return site_packages.str(); +} + std::vector PyModuleRegistry::probe_modules(const std::string &path) const { const auto opt = g_conf().get_val("mgr_disabled_modules"); diff --git a/src/mgr/PyModuleRegistry.h b/src/mgr/PyModuleRegistry.h index 9d6d9c2cdd021..da5bb596c938d 100644 --- a/src/mgr/PyModuleRegistry.h +++ b/src/mgr/PyModuleRegistry.h @@ -55,6 +55,7 @@ class PyModuleRegistry // before ClusterState exists. MgrMap mgr_map; + static std::string get_site_packages(); /** * Discover python modules from local disk */