From ae9a111db0499515514fc025a29f553be0b93a6e Mon Sep 17 00:00:00 2001 From: Igor Gaponenko Date: Wed, 22 Feb 2023 19:59:49 -0800 Subject: [PATCH 1/8] Add support for the file-based result delivery to worker's CMSD config Added the corresponding configuration option to the entry-point CLI for ingecting a value of the result folder into the config file of of the worker CMSD. The configuration file has been extended as well to allow serving files from workers via the XROOTD file-based protocol. The SSI provider class has been extened to recognise result files at the local filesystems of workers as valid XROOTD resources. --- admin/local/docker/compose/docker-compose.yml | 24 +++++++++++++++++++ admin/tools/docker/mariadb/Dockerfile | 4 ++-- .../python/lsst/qserv/admin/cli/entrypoint.py | 4 ++++ .../python/lsst/qserv/admin/cli/options.py | 8 +++++++ .../python/lsst/qserv/admin/cli/script.py | 3 +++ .../templates/xrootd/etc/cmsd-worker.cf.jinja | 7 +++++- .../templates/xrootd/etc/xrdssi.cf.jinja | 4 ++++ src/xrdsvc/CMakeLists.txt | 2 ++ src/xrdsvc/SsiProvider.cc | 11 +++++++++ 9 files changed, 64 insertions(+), 3 deletions(-) diff --git a/admin/local/docker/compose/docker-compose.yml b/admin/local/docker/compose/docker-compose.yml index ab7cdd61b9..2d8fb7a898 100644 --- a/admin/local/docker/compose/docker-compose.yml +++ b/admin/local/docker/compose/docker-compose.yml @@ -42,11 +42,13 @@ x-worker-repl: volumes: worker_0_data: + worker_0_results: worker_0_repl_cfg: worker_0_xrootd: worker_0_home: worker_0_mariadb_lib: worker_1_data: + worker_1_results: worker_1_repl_cfg: worker_1_xrootd: worker_1_home: @@ -79,6 +81,9 @@ services: - type: volume source: worker_0_data target: /qserv/data + - type: volume + source: worker_0_results + target: /qserv/data/results - type: volume source: worker_0_mariadb_lib target: /var/lib/mysql @@ -90,11 +95,15 @@ services: --db-uri mysql://qsmaster@127.0.0.1:3306 --db-admin-uri mysql://root:CHANGEME@127.0.0.1:3306 --vnid-config "@/usr/local/lib64/libreplica.so {{db_uri}}/qservw_worker 0 0" + --results-dirname /qserv/data/results --cmsd-manager-name manager-xrootd --cmsd-manager-count 1 --mysql-monitor-password CHANGEME_MONITOR --log-cfg-file=/config-etc/log/log-worker-xrootd.cnf volumes: + - type: volume + source: worker_0_results + target: /qserv/data/results - type: volume source: worker_0_xrootd target: /var/run/xrootd @@ -113,10 +122,14 @@ services: entrypoint worker-cmsd --db-uri mysql://qsmaster@worker-db-0:3306 --vnid-config "@/usr/local/lib64/libreplica.so mysql://qsmaster@127.0.0.1:3306/qservw_worker 0 0" + --results-dirname /qserv/data/results --cmsd-manager-name manager-xrootd --cmsd-manager-count 1 network_mode: "service:worker-xrootd-0" volumes: + - type: volume + source: worker_0_results + target: /qserv/data/results - type: volume source: worker_0_xrootd target: /var/run/xrootd @@ -156,6 +169,9 @@ services: - type: volume source: worker_1_data target: /qserv/data + - type: volume + source: worker_1_results + target: /qserv/data/results - type: volume source: worker_1_mariadb_lib target: /var/lib/mysql @@ -170,12 +186,16 @@ services: --db-uri mysql://qsmaster@127.0.0.1:3306?socket={{db_socket}} --db-admin-uri mysql://root:CHANGEME@127.0.0.1:3306?socket={{db_socket}} --vnid-config "@/usr/local/lib64/libreplica.so mysql://qsmaster@127.0.0.1:3306/qservw_worker 0 0" + --results-dirname /qserv/data/results --cmsd-manager-name manager-xrootd --cmsd-manager-count 1 --mysql-monitor-password CHANGEME_MONITOR --targs db_socket=/qserv/mariadb/run/mysqld.sock --log-cfg-file=/config-etc/log/log-worker-xrootd.cnf volumes: + - type: volume + source: worker_1_results + target: /qserv/data/results - type: volume source: worker_1_xrootd target: /var/run/xrootd @@ -197,10 +217,14 @@ services: entrypoint --log-level DEBUG worker-cmsd --db-uri mysql://qsmaster@worker-db-1:3306?socket=/qserv/mariadb/run/mysqld.sock --vnid-config "@/usr/local/lib64/libreplica.so mysql://qsmaster@127.0.0.1:3306/qservw_worker 0 0" + --results-dirname /qserv/data/results --cmsd-manager-name manager-xrootd --cmsd-manager-count 1 network_mode: "service:worker-xrootd-1" volumes: + - type: volume + source: worker_1_results + target: /qserv/data/results - type: volume source: worker_1_xrootd target: /var/run/xrootd diff --git a/admin/tools/docker/mariadb/Dockerfile b/admin/tools/docker/mariadb/Dockerfile index 11caf50b6f..d70a60dbae 100644 --- a/admin/tools/docker/mariadb/Dockerfile +++ b/admin/tools/docker/mariadb/Dockerfile @@ -55,7 +55,7 @@ FROM mariadb-scisql AS lite-mariadb RUN useradd --uid 1000 qserv -RUN mkdir -p /qserv/data /config-etc /var/log/mysql \ - && chown qserv:qserv /qserv/data /config-etc /var/log/mysql +RUN mkdir -p /qserv/data /qserv/data/results /config-etc /var/log/mysql \ + && chown qserv:qserv /qserv/data /qserv/data/results /config-etc /var/log/mysql USER qserv diff --git a/src/admin/python/lsst/qserv/admin/cli/entrypoint.py b/src/admin/python/lsst/qserv/admin/cli/entrypoint.py index 4a1f3e055e..16c47e0d67 100644 --- a/src/admin/python/lsst/qserv/admin/cli/entrypoint.py +++ b/src/admin/python/lsst/qserv/admin/cli/entrypoint.py @@ -55,6 +55,7 @@ repl_admin_auth_key_option, repl_auth_key_option, repl_connection_option, + results_dirname_option, run_option, run_tests_option, targs_options, @@ -551,6 +552,7 @@ def xrootd_manager(ctx: click.Context, **kwargs: Any) -> None: @pass_context @db_uri_option(help=worker_db_help) @vnid_config_option(required=True) +@results_dirname_option() @cmsd_manager_name_option() @cmsd_manager_count_option() @debug_option() @@ -584,6 +586,7 @@ def worker_cmsd(ctx: click.Context, **kwargs: Any) -> None: @db_uri_option(help=worker_db_help) @db_admin_uri_option(help=admin_worker_db_help) @vnid_config_option(required=True) +@results_dirname_option() @cmsd_manager_name_option() @cmsd_manager_count_option() @mysql_monitor_password_option() @@ -605,6 +608,7 @@ def worker_xrootd(ctx: click.Context, **kwargs: Any) -> None: db_uri=targs["db_uri"], db_admin_uri=targs["db_admin_uri"], vnid_config=targs["vnid_config"], + results_dirname=targs["results_dirname"], mysql_monitor_password=targs["mysql_monitor_password"], db_qserv_user=targs["db_qserv_user"], cmsd_worker_cfg_file=targs["cmsd_worker_cfg_file"], diff --git a/src/admin/python/lsst/qserv/admin/cli/options.py b/src/admin/python/lsst/qserv/admin/cli/options.py index 36ff7909fa..0d3040cb03 100644 --- a/src/admin/python/lsst/qserv/admin/cli/options.py +++ b/src/admin/python/lsst/qserv/admin/cli/options.py @@ -146,6 +146,14 @@ def __call__(self, f: Callable) -> Callable: " source (static string, a file or worker database)." ) +results_dirname_option = partial( + click.option, + "--results-dirname", + help="Path to a folder where worker stores result sets of queries.", + default="/qserv/data/results", + show_default=True, +) + xrootd_manager_option = partial( click.option, diff --git a/src/admin/python/lsst/qserv/admin/cli/script.py b/src/admin/python/lsst/qserv/admin/cli/script.py index b906ad36ad..0eb9123744 100644 --- a/src/admin/python/lsst/qserv/admin/cli/script.py +++ b/src/admin/python/lsst/qserv/admin/cli/script.py @@ -412,6 +412,7 @@ def enter_worker_xrootd( db_uri: str, db_admin_uri: str, vnid_config: str, + results_dirname: str, mysql_monitor_password: str, db_qserv_user: str, cmsd_worker_cfg_file: str, @@ -436,6 +437,8 @@ def enter_worker_xrootd( vnid_config : str The config parameters used by the qserv cmsd to get the vnid from the specified source (static string, a file or worker database). + results_dirname : str + A path to a folder where query results will be stored. mysql_monitor_password : str The password used by applications that monitor via the worker database. db_qserv_user : str diff --git a/src/admin/templates/xrootd/etc/cmsd-worker.cf.jinja b/src/admin/templates/xrootd/etc/cmsd-worker.cf.jinja index 8d5c604c64..d20eed6afe 100644 --- a/src/admin/templates/xrootd/etc/cmsd-worker.cf.jinja +++ b/src/admin/templates/xrootd/etc/cmsd-worker.cf.jinja @@ -4,7 +4,7 @@ all.role server cms.vnid {{ vnid_config }} # Use XrdSsi plugin -xrootd.fslib -2 libXrdSsi.so +xrootd.fslib -2 libXrdSsi.so default ssi.svclib libxrdsvc.so oss.statlib -2 -arevents libXrdSsi.so @@ -12,6 +12,10 @@ oss.statlib -2 -arevents libXrdSsi.so # because of XrdSsi xrootd.async off +# Access to the files at the file system as specified by the "default" option +# in the statement "xrootd.fslib" above. +ssi.fspath {{ results_dirname }} + ssi.trace all debug ######################################## @@ -25,6 +29,7 @@ all.adminpath /var/run/xrootd # "nolock" directive prevents write-locking and is important for qserv # qserv is hardcoded for these paths. all.export / nolock +all.export {{ results_dirname }} # Specify that no significant free space is required on servers # Indeed current configuration doesn't expect to be dynamically diff --git a/src/admin/templates/xrootd/etc/xrdssi.cf.jinja b/src/admin/templates/xrootd/etc/xrdssi.cf.jinja index 22fcf201d3..461325c42c 100644 --- a/src/admin/templates/xrootd/etc/xrdssi.cf.jinja +++ b/src/admin/templates/xrootd/etc/xrdssi.cf.jinja @@ -83,3 +83,7 @@ maxtransmits = 50 # If more than this number of large transmits is happening at once, wait to # start more transmits until some are done. maxalreadytransmitting = 10 + +[results] +# The name of a folder where query results will be stored. +dirname = {{ results_dirname }} diff --git a/src/xrdsvc/CMakeLists.txt b/src/xrdsvc/CMakeLists.txt index 246920b804..db08d3c1e9 100644 --- a/src/xrdsvc/CMakeLists.txt +++ b/src/xrdsvc/CMakeLists.txt @@ -14,6 +14,8 @@ target_include_directories(qserv_xrdsvc PRIVATE ) target_link_libraries(qserv_xrdsvc PUBLIC + boost_filesystem + boost_system log XrdSsiLib ) diff --git a/src/xrdsvc/SsiProvider.cc b/src/xrdsvc/SsiProvider.cc index ae490fcac2..3206ff9a15 100644 --- a/src/xrdsvc/SsiProvider.cc +++ b/src/xrdsvc/SsiProvider.cc @@ -32,6 +32,7 @@ #include // Third party headers +#include "boost/filesystem.hpp" #include "XrdSsi/XrdSsiCluster.hh" #include "XrdSsi/XrdSsiLogger.hh" @@ -176,6 +177,16 @@ XrdSsiProvider::rStat SsiProviderServer::QueryResource(char const* rName, char c return isPresent; } + // Treat other resources as absolute path names of files + boost::filesystem::path const path(rName); + if (path.is_absolute()) { + boost::system::error_code ec; + if (boost::filesystem::exists(path, ec) && !ec.value()) { + LOGS(_log, LOG_LVL_DEBUG, "SsiProvider File Resource " << rName << " recognized"); + return isPresent; + } + } + LOGS(_log, LOG_LVL_DEBUG, "SsiProvider Query " << rName << " invalid"); return notPresent; } From 27ab504df87a99a3f4aaaaff3341a5bee5ee0a52 Mon Sep 17 00:00:00 2001 From: Igor Gaponenko Date: Wed, 8 Mar 2023 07:30:52 +0000 Subject: [PATCH 2/8] Refactored and extended worker configuration service The service is now available to the client code via shared pointer that can be stored and used by classes as needed. The change reduces the number of parameters which are sent around the code. In the new version of the code only the shared pointer to the service is put to where the configuration parameters are consumed. Also added new parameters to support the file-based result delivery, including: a location of the results folder, the number of BOOST ASIO threads to run the QHTTP server at worker, and a selector for the desired results delivery protocol. --- .../templates/xrootd/etc/xrdssi.cf.jinja | 18 ++ src/wconfig/WorkerConfig.cc | 96 +++++++- src/wconfig/WorkerConfig.h | 230 +++++++++--------- src/wdb/testQueryRunner.cc | 6 +- src/wsched/testSchedulers.cc | 3 + src/xrdsvc/SsiProvider.cc | 8 +- src/xrdsvc/SsiService.cc | 65 ++--- src/xrdsvc/SsiService.h | 8 +- 8 files changed, 272 insertions(+), 162 deletions(-) diff --git a/src/admin/templates/xrootd/etc/xrdssi.cf.jinja b/src/admin/templates/xrootd/etc/xrdssi.cf.jinja index 461325c42c..59981c1c9f 100644 --- a/src/admin/templates/xrootd/etc/xrdssi.cf.jinja +++ b/src/admin/templates/xrootd/etc/xrdssi.cf.jinja @@ -85,5 +85,23 @@ maxtransmits = 50 maxalreadytransmitting = 10 [results] + # The name of a folder where query results will be stored. dirname = {{ results_dirname }} + +# The port number of the worker XROOTD service for serving files. +# NOTE: the hardcoded value may need to be replaced with a template +xrootd_port = 1094 + +# The number of the BOOST ASIO threads for HTTP requests +num_http_threads = 4 + +# Result delivery protocol. Allowed options: +# SSI - XROOTD/SSI stream (the default mode if no specific choice is proided) +# XROOT - XROOT file protocol +# HTTP - HTTP protocol +protocol = SSI + +# Set to any value but 0 if result files (if any) left after the previous run of +# the worker had to be deleted from the corresponding folder. +clean_up_on_start = 1 diff --git a/src/wconfig/WorkerConfig.cc b/src/wconfig/WorkerConfig.cc index 134c7d0585..3f8e9b914c 100644 --- a/src/wconfig/WorkerConfig.cc +++ b/src/wconfig/WorkerConfig.cc @@ -26,6 +26,10 @@ // System headers #include +#include + +// Third party headers +#include // LSST headers #include "lsst/log/Log.h" @@ -35,14 +39,99 @@ #include "util/ConfigStoreError.h" #include "wsched/BlendScheduler.h" +using namespace lsst::qserv::wconfig; + namespace { LOG_LOGGER _log = LOG_GET("lsst.qserv.wconfig.WorkerConfig"); +WorkerConfig::ResultDeliveryProtocol parseResultDeliveryProtocol(std::string const& str) { + // Using BOOST's 'iequals' for case-insensitive comparisons. + if (str.empty() || boost::iequals(str, "SSI")) { + return WorkerConfig::ResultDeliveryProtocol::SSI; + } else if (boost::iequals(str, "XROOT")) { + return WorkerConfig::ResultDeliveryProtocol::XROOT; + } else if (boost::iequals(str, "HTTP")) { + return WorkerConfig::ResultDeliveryProtocol::HTTP; + } + throw std::invalid_argument("WorkerConfig::" + std::string(__func__) + " unsupported method '" + str + + "'."); } +} // namespace namespace lsst::qserv::wconfig { +std::mutex WorkerConfig::_mtxOnInstance; + +std::shared_ptr WorkerConfig::_instance; + +std::shared_ptr WorkerConfig::create(std::string const& configFileName) { + std::lock_guard const lock(_mtxOnInstance); + if (_instance == nullptr) { + _instance = std::shared_ptr( + configFileName.empty() ? new WorkerConfig() + : new WorkerConfig(util::ConfigStore(configFileName))); + } + return _instance; +} + +std::shared_ptr WorkerConfig::instance() { + std::lock_guard const lock(_mtxOnInstance); + if (_instance == nullptr) { + throw std::logic_error("WorkerConfig::" + std::string(__func__) + ": instance has not been created."); + } + return _instance; +} + +std::string WorkerConfig::protocol2str(ResultDeliveryProtocol const& p) { + switch (p) { + case WorkerConfig::ResultDeliveryProtocol::SSI: + return "SSI"; + case WorkerConfig::ResultDeliveryProtocol::XROOT: + return "XROOT"; + case WorkerConfig::ResultDeliveryProtocol::HTTP: + return "HTTP"; + } + throw std::invalid_argument("WorkerConfig::" + std::string(__func__) + ": unknown protocol " + + std::to_string(static_cast(p))); +} + +WorkerConfig::WorkerConfig() + : _memManClass("MemManReal"), + _memManSizeMb(1000), + _memManLocation("/qserv/data/mysql"), + _threadPoolSize(wsched::BlendScheduler::getMinPoolSize()), + _maxPoolThreads(5000), + _maxGroupSize(1), + _requiredTasksCompleted(25), + _prioritySlow(2), + _prioritySnail(1), + _priorityMed(3), + _priorityFast(4), + _maxReserveSlow(2), + _maxReserveSnail(2), + _maxReserveMed(2), + _maxReserveFast(2), + _maxActiveChunksSlow(2), + _maxActiveChunksSnail(1), + _maxActiveChunksMed(4), + _maxActiveChunksFast(4), + _scanMaxMinutesFast(60), + _scanMaxMinutesMed(60 * 8), + _scanMaxMinutesSlow(60 * 12), + _scanMaxMinutesSnail(60 * 24), + _maxTasksBootedPerUserQuery(5), + _maxSqlConnections(800), + _ReservedInteractiveSqlConnections(50), + _bufferMaxTotalGB(41), + _maxTransmits(40), + _maxPerQid(3), + _resultsDirname("/qserv/data/results"), + _resultsXrootdPort(1094), + _resultsNumHttpThreads(1), + _resultDeliveryProtocol(ResultDeliveryProtocol::SSI), + _resultsCleanUpOnStart(true) {} + WorkerConfig::WorkerConfig(const util::ConfigStore& configStore) : _memManClass(configStore.get("memman.class", "MemManReal")), _memManSizeMb(configStore.getInt("memman.memory", 1000)), @@ -74,7 +163,12 @@ WorkerConfig::WorkerConfig(const util::ConfigStore& configStore) configStore.getInt("sqlconnections.reservedinteractivesqlconn", 50)), _bufferMaxTotalGB(configStore.getInt("transmit.buffermaxtotalgb", 41)), _maxTransmits(configStore.getInt("transmit.maxtransmits", 40)), - _maxPerQid(configStore.getInt("transmit.maxperqid", 3)) { + _maxPerQid(configStore.getInt("transmit.maxperqid", 3)), + _resultsDirname(configStore.get("results.dirname", "/qserv/data/results")), + _resultsXrootdPort(configStore.getInt("results.xrootd_port", 1094)), + _resultsNumHttpThreads(configStore.getInt("results.num_http_threads", 1)), + _resultDeliveryProtocol(::parseResultDeliveryProtocol(configStore.get("results.protocol", "SSI"))), + _resultsCleanUpOnStart(configStore.getInt("results.clean_up_on_start", 1) != 0) { int mysqlPort = configStore.getInt("mysql.port"); std::string mysqlSocket = configStore.get("mysql.socket"); if (mysqlPort == 0 && mysqlSocket.empty()) { diff --git a/src/wconfig/WorkerConfig.h b/src/wconfig/WorkerConfig.h index 39914a02e2..6746052bea 100644 --- a/src/wconfig/WorkerConfig.h +++ b/src/wconfig/WorkerConfig.h @@ -26,6 +26,8 @@ // System headers #include +#include +#include #include // Qserv headers @@ -35,201 +37,184 @@ namespace lsst::qserv::wconfig { /** - * Provide all configuration parameters for a Qserv worker instance - * - * Parse an INI configuration file, identify required parameters and ignore - * others, analyze and store them inside private member variables, use default - * values for missing parameters, provide accessor for each of these variable. - * This class hide configuration complexity - * from other part of the code. All private member variables are related to INI - * parameters and are immutables. + * Provide all configuration parameters for a Qserv worker instance. + * Parse an INI configuration file, identify required parameters and ignore + * others, analyze and store them inside private member variables, use default + * values for missing parameters, provide accessor for each of these variable. + * This class hides configuration complexity + * from other part of the code. All private member variables are related to INI + * parameters and are immutables. * + * @note the class has a thread-safe API. */ class WorkerConfig { public: /** - * Create WorkerConfig instance from a INI configuration file - * - * @param configFileName: path to worker INI configuration file + * The enumeration type representing available methods for pulling query results + * from workers. + * @note The default method, if none was found in the configuration, would be SSI. + */ + enum class ResultDeliveryProtocol : int { + SSI = 0, ///< Pull data from the SSI stream (default) + XROOT = 1, ///< Use XROOTD file protocol + HTTP = 2 ///< Use HTTP protocol + }; + + /// @return the string representation of the protocol + /// @throw std::invalid_argument if the protocol is unknown + static std::string protocol2str(ResultDeliveryProtocol const& p); + + /** + * Create an instance of WorkerConfig and if a configuration file is provided then + * load parameters from the file. Otherwise create an object with default values + * of the parameters. + * @note One has to call this method at least once before trying to obtain + * a pointer of the instance by calling 'instnce()'. The method 'create()' + * can be called many times. A new instance would be created each time and + * stored witin the class. + * @param configFileName - (optional) path to worker INI configuration file + * @return the shared pointer to the configuration object */ - explicit WorkerConfig(std::string configFileName) : WorkerConfig(util::ConfigStore(configFileName)) {} + static std::shared_ptr create(std::string const& configFileName = std::string()); + + /** + * Get a pointer to an instance that was created by a last call to + * the method 'create'. + * @return the shared pointer to the configuration object + * @throws std::logic_error when attempting to call the bethod before creating an instance. + */ + static std::shared_ptr instance(); WorkerConfig(WorkerConfig const&) = delete; WorkerConfig& operator=(WorkerConfig const&) = delete; - /* Get thread pool size for shared scans - * - * @return thread pool size for shared scans - */ + /// @return thread pool size for shared scans unsigned int getThreadPoolSize() const { return _threadPoolSize; } - /* Get the maximum number of threads the pool can have in - * existence at any given time. - */ + /// @return maximum number of threads the pool can have in existence at any given time unsigned int getMaxPoolThreads() const { return _maxPoolThreads; } - /* Get required number of completed tasks for table in a chunk for the average to be valid. - * - * @return required tasks completed before average time is valid. - */ + /// @return required number of tasks for table in a chunk for the average to be valid unsigned int getRequiredTasksCompleted() const { return _requiredTasksCompleted; } - /* Get the number of tasks that can be booted from a single user query. - * - * @return Maximum number of tasks that can be booted from a single user query. - */ + /// @return maximum number of tasks that can be booted from a single user query unsigned int getMaxTasksBootedPerUserQuery() const { return _maxTasksBootedPerUserQuery; } - /* Get maximum time in minutes for all tasks in a user query to finish for the fast scan. - * - * @return Maximum minutes for a user query to complete on the fast scan. - */ + /// @return maximum time for a user query to complete all tasks on the fast scan unsigned int getScanMaxMinutesFast() const { return _scanMaxMinutesFast; } - /* Get maximum time in minutes for all tasks in a user query to finish for the medium scan. - * - * @return Maximum minutes for a user query to complete on the medium scan. - */ + /// @return maximum time for a user query to complete all tasks on the medium scan unsigned int getScanMaxMinutesMed() const { return _scanMaxMinutesMed; } - /* Get maximum time in minutes for all tasks in a user query to finish for the slow scan. - * - * @return Maximum minutes for a user query to complete on the slow scan. - */ + /// @return maximum time for a user query to complete all tasks on the slow scan unsigned int getScanMaxMinutesSlow() const { return _scanMaxMinutesSlow; } - /* Get maximum time in minutes for all tasks in a user query to finish for the snail scan. - * - * @return Maximum minutes for a user query to complete on the snail scan. - */ + /// @return maximum time for a user query to complete all tasks on the snail scan unsigned int getScanMaxMinutesSnail() const { return _scanMaxMinutesSnail; } - /* Get maximum number of task accepted in a group queue - * - * @return maximum number of task accepted in a group queue - */ + /// @return maximum number of task accepted in a group queue unsigned int getMaxGroupSize() const { return _maxGroupSize; } - /* Get max thread reserve for fast shared scan - * - * @return max thread reserve for fast shared scan - */ + /// @return max thread reserve for fast shared scan unsigned int getMaxReserveFast() const { return _maxReserveFast; } - /* Get max thread reserve for medium shared scan - * - * @return max thread reserve for medium shared scan - */ + /// @return max thread reserve for medium shared scan unsigned int getMaxReserveMed() const { return _maxReserveMed; } - /* Get max thread reserve for slow shared scan - * - * @return max thread reserve for slow shared scan - */ + /// @return max thread reserve for slow shared scan unsigned int getMaxReserveSlow() const { return _maxReserveSlow; } - /* Get max thread reserve for snail shared scan - * - * @return max thread reserve for snail shared scan - */ + /// @return max thread reserve for snail shared scan unsigned int getMaxReserveSnail() const { return _maxReserveSnail; } - /* Get selected memory management implementation - * - * @return class name implementing selected memory management - */ + /// @return class name implementing selected memory management std::string const& getMemManClass() const { return _memManClass; } - /* Get path to directory where the Memory Manager database resides - * - * @return path to directory where the Memory Manager database resides - */ + /// @return path to directory where the Memory Manager database resides std::string const& getMemManLocation() const { return _memManLocation; } - /* Get maximum amount of memory that can be used by Memory Manager - * - * @return maximum amount of memory that can be used by Memory Manager - */ + /// @return maximum amount of memory that can be used by Memory Manager uint64_t getMemManSizeMb() const { return _memManSizeMb; } - /* Get MySQL configuration for worker MySQL instance - * - * @return a structure containing MySQL parameters - */ + /// @return a configuration for worker MySQL instance. mysql::MySqlConfig const& getMySqlConfig() const { return _mySqlConfig; } - /* Get fast shared scan priority - * - * @return fast shared scan priority - */ + /// @return fast shared scan priority unsigned int getPriorityFast() const { return _priorityFast; } - /* Get medium shared scan priority - * - * @return medium shared scan priority - */ + /// @return medium shared scan priority unsigned int getPriorityMed() const { return _priorityMed; } - /* Get slow shared scan priority - * - * @return slow shared scan priority - */ + /// @return slow shared scan priority unsigned int getPrioritySlow() const { return _prioritySlow; } - /* Get snail shared scan priority - * - * @return slow shared scan priority - */ + /// @return slow shared scan priority unsigned int getPrioritySnail() const { return _prioritySnail; } - /* Get maximum concurrent chunks for fast shared scan. - * - * @return fast shared scan maxActiveChunks. - */ + /// @return maximum concurrent chunks for fast shared scan unsigned int getMaxActiveChunksFast() const { return _maxActiveChunksFast; } - /* Get maximum concurrent chunks for medium shared scan. - * - * @return medium shared scan maxActiveChunks. - */ + /// @return maximum concurrent chunks for medium shared scan unsigned int getMaxActiveChunksMed() const { return _maxActiveChunksMed; } - /* Get maximum concurrent chunks for slow shared scan. - * - * @return slow shared scan maxActiveChunks. - */ + /// @return maximum concurrent chunks for slow shared scan unsigned int getMaxActiveChunksSlow() const { return _maxActiveChunksSlow; } - /* Get maximum concurrent chunks for snail shared scan. - * - * @return snail shared scan maxActiveChunks. - */ + /// @return maximum concurrent chunks for snail shared scan unsigned int getMaxActiveChunksSnail() const { return _maxActiveChunksSnail; } - /// @return the maximum number of SQL connections for tasks. + /// @return the maximum number of SQL connections for tasks unsigned int getMaxSqlConnections() const { return _maxSqlConnections; } - /// @return the number of SQL connections reserved for interactive tasks. + + /// @return the number of SQL connections reserved for interactive tasks unsigned int getReservedInteractiveSqlConnections() const { return _ReservedInteractiveSqlConnections; } - /// @return the maximum number of gigabytes that can be used by StreamBuffers. + /// @return the maximum number of gigabytes that can be used by StreamBuffers unsigned int getBufferMaxTotalGB() const { return _bufferMaxTotalGB; } - /// @return the maximum number of concurrent transmits to a czar. + /// @return the maximum number of concurrent transmits to a czar unsigned int getMaxTransmits() const { return _maxTransmits; } int getMaxPerQid() const { return _maxPerQid; } - /** Overload output operator for current class - * - * @param out - * @param workerConfig - * @return an output stream + /// @return the name of a folder where query results will be stored + std::string const& resultsDirname() const { return _resultsDirname; } + + /// @return the port number of the worker XROOTD service for serving result files + uint16_t resultsXrootdPort() const { return _resultsXrootdPort; } + + /// @return the number of the BOOST ASIO threads for servicing HTGTP requests + size_t resultsNumHttpThreads() const { return _resultsNumHttpThreads; } + + /// @return the result delivery method + ResultDeliveryProtocol resultDeliveryProtocol() const { return _resultDeliveryProtocol; } + + /// @return 'true' if result files (if any) left after the previous run of the worker + /// had to be deleted from the corresponding folder. + bool resultsCleanUpOnStart() const { return _resultsCleanUpOnStart; } + + /** + * Dump the configuration object onto the output stream. + * @param out - the output stream object + * @param workerConfig - worker configuration object + * @return the output stream object */ friend std::ostream& operator<<(std::ostream& out, WorkerConfig const& workerConfig); private: + /// Initialize parameters with default values + WorkerConfig(); + + /// Initialize parameters from the configuration store + /// @param configStore WorkerConfig(util::ConfigStore const& configStore); + /// This mutex protects the static member _instance. + static std::mutex _mtxOnInstance; + + /// The configuratoon object created by the last call to the method 'test'. + static std::shared_ptr _instance; + mysql::MySqlConfig _mySqlConfig; std::string const _memManClass; @@ -267,6 +252,11 @@ class WorkerConfig { unsigned int const _bufferMaxTotalGB; unsigned int const _maxTransmits; int const _maxPerQid; + std::string const _resultsDirname; + uint16_t const _resultsXrootdPort; + size_t const _resultsNumHttpThreads; + ResultDeliveryProtocol const _resultDeliveryProtocol; + bool const _resultsCleanUpOnStart; }; } // namespace lsst::qserv::wconfig diff --git a/src/wdb/testQueryRunner.cc b/src/wdb/testQueryRunner.cc index 175e092be1..d944a93db2 100644 --- a/src/wdb/testQueryRunner.cc +++ b/src/wdb/testQueryRunner.cc @@ -33,6 +33,7 @@ #include "util/StringHash.h" #include "wbase/SendChannelShared.h" #include "wbase/Task.h" +#include "wconfig/WorkerConfig.h" #include "wcontrol/SqlConnMgr.h" #include "wcontrol/TransmitMgr.h" #include "wdb/ChunkResource.h" @@ -61,7 +62,8 @@ using lsst::qserv::proto::TaskMsg_Subchunk; using lsst::qserv::wbase::SendChannel; using lsst::qserv::wbase::SendChannelShared; using lsst::qserv::wbase::Task; -using lsst::qserv::wcontrol::SqlConnMgr; +using lsst::qserv::wbase::SendChannel; +using lsst::qserv::wconfig::WorkerConfig; using lsst::qserv::wcontrol::TransmitMgr; using lsst::qserv::wdb::ChunkResource; using lsst::qserv::wdb::ChunkResourceMgr; @@ -105,6 +107,7 @@ BOOST_AUTO_TEST_CASE(Simple) { shared_ptr msg(newTaskMsg()); shared_ptr sendC(SendChannel::newNopChannel()); auto sc = SendChannelShared::create(sendC, locTransmitMgr, 1); + WorkerConfig::create(); auto taskVect = Task::createTasks(msg, sc); Task::Ptr task = taskVect[0]; FakeBackend::Ptr backend = make_shared(); @@ -119,6 +122,7 @@ BOOST_AUTO_TEST_CASE(Output) { shared_ptr msg(newTaskMsg()); shared_ptr sendC(SendChannel::newStringChannel(out)); auto sc = SendChannelShared::create(sendC, locTransmitMgr, 1); + WorkerConfig::create(); auto taskVect = Task::createTasks(msg, sc); Task::Ptr task = taskVect[0]; FakeBackend::Ptr backend = make_shared(); diff --git a/src/wsched/testSchedulers.cc b/src/wsched/testSchedulers.cc index 017874fc34..7979cd7028 100644 --- a/src/wsched/testSchedulers.cc +++ b/src/wsched/testSchedulers.cc @@ -38,6 +38,7 @@ #include "util/EventThread.h" #include "wbase/SendChannelShared.h" #include "wbase/Task.h" +#include "wconfig/WorkerConfig.h" #include "wcontrol/TransmitMgr.h" #include "wpublish/QueriesAndChunks.h" #include "wsched/ChunkTasksQueue.h" @@ -62,6 +63,7 @@ using lsst::qserv::proto::TaskMsg; using lsst::qserv::wbase::SendChannel; using lsst::qserv::wbase::SendChannelShared; using lsst::qserv::wbase::Task; +using lsst::qserv::wconfig::WorkerConfig; double const oneHr = 60.0; @@ -74,6 +76,7 @@ Task::Ptr makeTask(std::shared_ptr tm) { auto sendC = std::make_shared(); auto sc = SendChannelShared::create(sendC, locTransmitMgr, 1); locSendSharedPtrs.push_back(sc); + WorkerConfig::create(); auto taskVect = Task::createTasks(tm, sc); Task::Ptr task = taskVect[0]; task->setSafeToMoveRunning(true); // Can't wait for MemMan in unit tests. diff --git a/src/xrdsvc/SsiProvider.cc b/src/xrdsvc/SsiProvider.cc index 3206ff9a15..9aae72f20d 100644 --- a/src/xrdsvc/SsiProvider.cc +++ b/src/xrdsvc/SsiProvider.cc @@ -90,8 +90,8 @@ bool SsiProviderServer::Init(XrdSsiLogger* logP, XrdSsiCluster* clsP, std::strin LOGS(_log, LOG_LVL_DEBUG, "Qserv xrdssi plugin configuration file: " << argv[1]); std::string workerConfigFile = argv[1]; - wconfig::WorkerConfig workerConfig(workerConfigFile); - LOGS(_log, LOG_LVL_DEBUG, "Qserv xrdssi plugin configuration: " << workerConfig); + auto const workerConfig = wconfig::WorkerConfig::create(workerConfigFile); + LOGS(_log, LOG_LVL_DEBUG, "Qserv xrdssi plugin configuration: " << *workerConfig); // Save the ssi logger as it places messages in another file than our log. // @@ -118,7 +118,7 @@ bool SsiProviderServer::Init(XrdSsiLogger* logP, XrdSsiCluster* clsP, std::strin // calls either in the data provider and the metadata provider (we can be // either one). // - _chunkInventory.init(x.getName(), workerConfig.getMySqlConfig()); + _chunkInventory.init(x.getName(), workerConfig->getMySqlConfig()); // If we are a data provider (i.e. xrootd) then we need to get the service // object. It will print the exported paths. Otherwise, we need to print @@ -126,7 +126,7 @@ bool SsiProviderServer::Init(XrdSsiLogger* logP, XrdSsiCluster* clsP, std::strin // single shared memory inventory object which should do this by itself. // if (clsP && clsP->DataContext()) { - _service.reset(new SsiService(logP, workerConfig)); + _service.reset(new SsiService(logP)); } else { std::ostringstream ss; ss << "Provider valid paths(ci): "; diff --git a/src/xrdsvc/SsiService.cc b/src/xrdsvc/SsiService.cc index 546ae5f643..16f6ffcfae 100644 --- a/src/xrdsvc/SsiService.cc +++ b/src/xrdsvc/SsiService.cc @@ -76,7 +76,7 @@ int dummyInitMDC = LOG_MDC_INIT(initMDC); namespace lsst::qserv::xrdsvc { SsiService::SsiService(XrdSsiLogger* log, wconfig::WorkerConfig const& workerConfig) - : _mySqlConfig(workerConfig.getMySqlConfig()) { + : _mySqlConfig(wconfig::WorkerConfig::instance()->getMySqlConfig()) { LOGS(_log, LOG_LVL_DEBUG, "SsiService starting..."); util::HoldTrack::setup(10min); @@ -87,16 +87,17 @@ SsiService::SsiService(XrdSsiLogger* log, wconfig::WorkerConfig const& workerCon } _initInventory(); - string cfgMemMan = workerConfig.getMemManClass(); + auto const workerConfig = wconfig::WorkerConfig::instance(); + string cfgMemMan = workerConfig->getMemManClass(); memman::MemMan::Ptr memMan; if (cfgMemMan == "MemManReal") { // Default to 1 gigabyte - uint64_t memManSize = workerConfig.getMemManSizeMb() * 1000000; + uint64_t memManSize = workerConfig->getMemManSizeMb() * 1000000; LOGS(_log, LOG_LVL_DEBUG, - "Using MemManReal with memManSizeMb=" << workerConfig.getMemManSizeMb() - << " location=" << workerConfig.getMemManLocation()); + "Using MemManReal with memManSizeMb=" << workerConfig->getMemManSizeMb() + << " location=" << workerConfig->getMemManLocation()); memMan = shared_ptr( - memman::MemMan::create(memManSize, workerConfig.getMemManLocation())); + memman::MemMan::create(memManSize, workerConfig->getMemManLocation())); } else if (cfgMemMan == "MemManNone") { memMan = make_shared(1, false); } else { @@ -104,18 +105,18 @@ SsiService::SsiService(XrdSsiLogger* log, wconfig::WorkerConfig const& workerCon throw wconfig::WorkerConfigError("Unrecognized memory manager."); } - int64_t bufferMaxTotalBytes = workerConfig.getBufferMaxTotalGB() * 1'000'000'000LL; + int64_t bufferMaxTotalBytes = workerConfig->getBufferMaxTotalGB() * 1'000'000'000LL; StreamBuffer::setMaxTotalBytes(bufferMaxTotalBytes); // Set thread pool size. - unsigned int poolSize = max(workerConfig.getThreadPoolSize(), thread::hardware_concurrency()); - unsigned int maxPoolThreads = max(workerConfig.getMaxPoolThreads(), poolSize); + unsigned int poolSize = max(workerConfig->getThreadPoolSize(), thread::hardware_concurrency()); + unsigned int maxPoolThreads = max(workerConfig->getMaxPoolThreads(), poolSize); // poolSize should be greater than either GroupScheduler::maxThreads or ScanScheduler::maxThreads unsigned int maxThread = poolSize; int maxReserve = 2; auto group = make_shared("SchedGroup", maxThread, maxReserve, - workerConfig.getMaxGroupSize(), + workerConfig->getMaxGroupSize(), wsched::SchedulerBase::getMaxPriority()); int const fastest = lsst::qserv::proto::ScanInfo::Rating::FASTEST; @@ -123,26 +124,28 @@ SsiService::SsiService(XrdSsiLogger* log, wconfig::WorkerConfig const& workerCon int const medium = lsst::qserv::proto::ScanInfo::Rating::MEDIUM; int const slow = lsst::qserv::proto::ScanInfo::Rating::SLOW; int const slowest = lsst::qserv::proto::ScanInfo::Rating::SLOWEST; - double fastScanMaxMinutes = (double)workerConfig.getScanMaxMinutesFast(); - double medScanMaxMinutes = (double)workerConfig.getScanMaxMinutesMed(); - double slowScanMaxMinutes = (double)workerConfig.getScanMaxMinutesSlow(); - double snailScanMaxMinutes = (double)workerConfig.getScanMaxMinutesSnail(); - int maxTasksBootedPerUserQuery = workerConfig.getMaxTasksBootedPerUserQuery(); + double fastScanMaxMinutes = (double)workerConfig->getScanMaxMinutesFast(); + double medScanMaxMinutes = (double)workerConfig->getScanMaxMinutesMed(); + double slowScanMaxMinutes = (double)workerConfig->getScanMaxMinutesSlow(); + double snailScanMaxMinutes = (double)workerConfig->getScanMaxMinutesSnail(); + int maxTasksBootedPerUserQuery = workerConfig->getMaxTasksBootedPerUserQuery(); vector scanSchedulers{ + make_shared("SchedSlow", maxThread, workerConfig->getMaxReserveSlow(), + workerConfig->getPrioritySlow(), + workerConfig->getMaxActiveChunksSlow(), memMan, medium + 1, + slow, slowScanMaxMinutes), + make_shared("SchedFast", maxThread, workerConfig->getMaxReserveFast(), + workerConfig->getPriorityFast(), + workerConfig->getMaxActiveChunksFast(), memMan, fastest, fast, + fastScanMaxMinutes), make_shared( - "SchedSlow", maxThread, workerConfig.getMaxReserveSlow(), workerConfig.getPrioritySlow(), - workerConfig.getMaxActiveChunksSlow(), memMan, medium + 1, slow, slowScanMaxMinutes), - make_shared( - "SchedFast", maxThread, workerConfig.getMaxReserveFast(), workerConfig.getPriorityFast(), - workerConfig.getMaxActiveChunksFast(), memMan, fastest, fast, fastScanMaxMinutes), - make_shared( - "SchedMed", maxThread, workerConfig.getMaxReserveMed(), workerConfig.getPriorityMed(), - workerConfig.getMaxActiveChunksMed(), memMan, fast + 1, medium, medScanMaxMinutes), + "SchedMed", maxThread, workerConfig->getMaxReserveMed(), workerConfig->getPriorityMed(), + workerConfig->getMaxActiveChunksMed(), memMan, fast + 1, medium, medScanMaxMinutes), }; auto snail = make_shared( - "SchedSnail", maxThread, workerConfig.getMaxReserveSnail(), workerConfig.getPrioritySnail(), - workerConfig.getMaxActiveChunksSnail(), memMan, slow + 1, slowest, snailScanMaxMinutes); + "SchedSnail", maxThread, workerConfig->getMaxReserveSnail(), workerConfig->getPrioritySnail(), + workerConfig->getMaxActiveChunksSnail(), memMan, slow + 1, slowest, snailScanMaxMinutes); wpublish::QueriesAndChunks::Ptr queries = wpublish::QueriesAndChunks::setupGlobal( chrono::minutes(5), chrono::minutes(5), maxTasksBootedPerUserQuery); @@ -151,22 +154,22 @@ SsiService::SsiService(XrdSsiLogger* log, wconfig::WorkerConfig const& workerCon blendSched->setPrioritizeByInFlight(false); // TODO: set in configuration file. queries->setBlendScheduler(blendSched); - unsigned int requiredTasksCompleted = workerConfig.getRequiredTasksCompleted(); + unsigned int requiredTasksCompleted = workerConfig->getRequiredTasksCompleted(); queries->setRequiredTasksCompleted(requiredTasksCompleted); - int const maxSqlConn = workerConfig.getMaxSqlConnections(); - int const resvInteractiveSqlConn = workerConfig.getReservedInteractiveSqlConnections(); + int const maxSqlConn = workerConfig->getMaxSqlConnections(); + int const resvInteractiveSqlConn = workerConfig->getReservedInteractiveSqlConnections(); auto sqlConnMgr = make_shared(maxSqlConn, maxSqlConn - resvInteractiveSqlConn); LOGS(_log, LOG_LVL_WARN, "config sqlConnMgr" << *sqlConnMgr); - int const maxTransmits = workerConfig.getMaxTransmits(); - int const maxPerQid = workerConfig.getMaxPerQid(); + int const maxTransmits = workerConfig->getMaxTransmits(); + int const maxPerQid = workerConfig->getMaxPerQid(); _transmitMgr = make_shared(maxTransmits, maxPerQid); LOGS(_log, LOG_LVL_WARN, "config transmitMgr" << *_transmitMgr); LOGS(_log, LOG_LVL_WARN, "maxPoolThreads=" << maxPoolThreads); _foreman = make_shared(blendSched, poolSize, maxPoolThreads, - workerConfig.getMySqlConfig(), queries, sqlConnMgr); + _mySqlConfig, queries, sqlConnMgr); // Watch to see if the log configuration is changed. // If LSST_LOG_CONFIG is not defined, there's no good way to know what log diff --git a/src/xrdsvc/SsiService.h b/src/xrdsvc/SsiService.h index 1ceb677dd7..aa54c18245 100644 --- a/src/xrdsvc/SsiService.h +++ b/src/xrdsvc/SsiService.h @@ -33,7 +33,6 @@ // Qserv headers #include "mysql/MySqlConfig.h" -#include "wconfig/WorkerConfig.h" // Forward declarations class XrdSsiLogger; @@ -57,14 +56,13 @@ namespace lsst::qserv::xrdsvc { /// worker services class SsiService : public XrdSsiService { public: - /** Build a SsiService object - * + /** + * Build a SsiService object * @param log xrdssi logger - * @param config SSiservice configuration parameters */ // take ownership of logger for now - SsiService(XrdSsiLogger* log, wconfig::WorkerConfig const& workerConfig); + SsiService(XrdSsiLogger* log); virtual ~SsiService(); /// Called by SSI framework to handle new requests From fab008216817e171fe11851ec490433d7decdff0 Mon Sep 17 00:00:00 2001 From: Igor Gaponenko Date: Tue, 7 Mar 2023 07:52:18 +0000 Subject: [PATCH 3/8] Extended Protobuf definition to return file resource locations Also elminated deprecated protobuf schema and large result attributes --- src/ccontrol/MergingHandler.cc | 9 +++----- src/ccontrol/MergingHandler.h | 3 +-- src/proto/FakeProtocolFixture.h | 1 - src/proto/worker.proto | 10 +++++++-- src/qdisp/Executive.h | 1 - src/qdisp/QueryRequest.cc | 19 +++++----------- src/qdisp/QueryRequest.h | 3 +-- src/qdisp/ResponseHandler.h | 6 ++--- src/qdisp/XrdSsiMocks.cc | 1 - src/rproc/InfileMerger.cc | 6 ++--- src/rproc/InfileMerger.h | 4 ---- src/rproc/ProtoRowBuffer.cc | 38 +------------------------------- src/rproc/ProtoRowBuffer.h | 3 --- src/wbase/TransmitData.cc | 18 +++++---------- src/wbase/TransmitData.h | 39 +++++++-------------------------- src/wdb/QueryRunner.h | 2 -- 16 files changed, 36 insertions(+), 127 deletions(-) diff --git a/src/ccontrol/MergingHandler.cc b/src/ccontrol/MergingHandler.cc index 5b3f14102b..3b30922d63 100644 --- a/src/ccontrol/MergingHandler.cc +++ b/src/ccontrol/MergingHandler.cc @@ -86,8 +86,7 @@ const char* MergingHandler::getStateStr(MsgState const& state) { return "unknown"; } -bool MergingHandler::flush(int bLen, BufPtr const& bufPtr, bool& last, bool& largeResult, int& nextBufSize, - int& resultRows) { +bool MergingHandler::flush(int bLen, BufPtr const& bufPtr, bool& last, int& nextBufSize, int& resultRows) { LOGS(_log, LOG_LVL_DEBUG, "From:" << _wName << " flush state=" << getStateStr(_state) << " blen=" << bLen << " last=" << last); resultRows = 0; @@ -112,7 +111,6 @@ bool MergingHandler::flush(int bLen, BufPtr const& bufPtr, bool& last, bool& lar { nextBufSize = _response->protoHeader.size(); - largeResult = _response->protoHeader.largeresult(); bool endNoData = _response->protoHeader.endnodata(); int seq = -1; int scsSeq = -1; @@ -123,9 +121,8 @@ bool MergingHandler::flush(int bLen, BufPtr const& bufPtr, bool& last, bool& lar scsSeq = _response->protoHeader.scsseq(); } LOGS(_log, LOG_LVL_DEBUG, - "HEADER_WAIT: From:" << _wName << " nextBufSize=" << nextBufSize - << " largeResult=" << largeResult << " endNoData=" << endNoData - << " seq=" << seq << " scsseq=" << scsSeq); + "HEADER_WAIT: From:" << _wName << " nextBufSize=" << nextBufSize << " endNoData=" + << endNoData << " seq=" << seq << " scsseq=" << scsSeq); _state = MsgState::RESULT_WAIT; if (endNoData || nextBufSize == 0) { diff --git a/src/ccontrol/MergingHandler.h b/src/ccontrol/MergingHandler.h index dc5bff13ee..bddbdbd735 100644 --- a/src/ccontrol/MergingHandler.h +++ b/src/ccontrol/MergingHandler.h @@ -72,8 +72,7 @@ class MergingHandler : public qdisp::ResponseHandler { /// Flush the retrieved buffer where bLen bytes were set. If last==true, /// then no more buffer() and flush() calls should occur. /// @return true if successful (no error) - bool flush(int bLen, BufPtr const& bufPtr, bool& last, bool& largeResult, int& nextBufSize, - int& resultRows) override; + bool flush(int bLen, BufPtr const& bufPtr, bool& last, int& nextBufSize, int& resultRows) override; /// Signal an unrecoverable error condition. No further calls are expected. void errorFlush(std::string const& msg, int code) override; diff --git a/src/proto/FakeProtocolFixture.h b/src/proto/FakeProtocolFixture.h index d9980ea7ad..e5da34c81a 100644 --- a/src/proto/FakeProtocolFixture.h +++ b/src/proto/FakeProtocolFixture.h @@ -89,7 +89,6 @@ class FakeProtocolFixture { p->set_protocol(2); p->set_size(500); p->set_md5(std::string("1234567890abcdef0")); - p->set_largeresult(false); p->set_endnodata(false); return p; } diff --git a/src/proto/worker.proto b/src/proto/worker.proto index e4b6fd7737..d61459cb6c 100644 --- a/src/proto/worker.proto +++ b/src/proto/worker.proto @@ -90,20 +90,24 @@ message ProtoHeader { optional sfixed32 size = 2; // protobufs discourages messages > megabytes optional bytes md5 = 3; optional string wname = 4; - optional bool largeresult = 5; + optional bool largeresult = 5; // DEPRECATED optional bool endnodata = 6; // True if this header is the end, no more data. size should be 0. optional uint32 seq = 7; // sequence number from SendChannel optional int32 scsseq = 8; // sequence number from SendChannelShared, can be -1 } +// DEPRECATED message ColumnSchema { optional string name = 1; // Optional to allow type-only transmission optional string sqltype = 2; optional int32 mysqltype = 3; } + +// DEPRECATED message RowSchema { repeated ColumnSchema columnschema = 1; } + message RowBundle { repeated bytes column = 1; // bytes to allow BLOB encoding repeated bool isnull = 2; // Flag to allow sending nulls. @@ -111,7 +115,7 @@ message RowBundle { message Result { optional int64 session = 1; - optional RowSchema rowschema = 2; + optional RowSchema rowschema = 2; // DEPRECATED optional int32 errorcode = 3; optional string errormsg = 4; repeated RowBundle row = 5; @@ -120,6 +124,8 @@ message Result { optional uint32 rowcount = 8; optional uint64 transmitsize = 9; optional int32 attemptcount = 10; + optional string fileresource_xroot = 11; /// XROOTD url for the result file + optional string fileresource_http = 12; /// HTTP url for the result file } // Result protocol 2: diff --git a/src/qdisp/Executive.h b/src/qdisp/Executive.h index d1318713a7..136967c298 100644 --- a/src/qdisp/Executive.h +++ b/src/qdisp/Executive.h @@ -64,7 +64,6 @@ class QuerySession; namespace qdisp { class JobQuery; -class LargeResultMgr; class MessageStore; class PseudoFifo; diff --git a/src/qdisp/QueryRequest.cc b/src/qdisp/QueryRequest.cc index d8741d2d74..40490377cf 100644 --- a/src/qdisp/QueryRequest.cc +++ b/src/qdisp/QueryRequest.cc @@ -357,12 +357,10 @@ bool QueryRequest::_importStream(JobQuery::Ptr const& jq) { ResponseHandler::BufPtr bufPtr = make_shared>(buff, buff + len); // Use `flush()` to read the buffer and extract the header. - bool largeResult = false; int nextBufSize = 0; bool last = false; int resultRows = 0; - bool flushOk = jq->getDescription()->respHandler()->flush(len, bufPtr, last, largeResult, nextBufSize, - resultRows); + bool flushOk = jq->getDescription()->respHandler()->flush(len, bufPtr, last, nextBufSize, resultRows); if (!flushOk) { LOGS(_log, LOG_LVL_ERROR, "_importStream not flushOk"); @@ -512,7 +510,6 @@ void QueryRequest::_processData(JobQuery::Ptr const& jq, int blen, bool xrdLast) ResponseHandler::BufPtr nextHeaderBufPtr; // Values for these variables to be filled in by flush() calls. - bool largeResult = false; int nextBufSize = 0; int resultRows = 0; bool last = false; @@ -525,8 +522,8 @@ void QueryRequest::_processData(JobQuery::Ptr const& jq, int blen, bool xrdLast) int respSize = blen - protoHeaderSize; nextHeaderBufPtr = make_shared>(bufPtr->begin() + respSize, bufPtr->end()); // Read the result - // Values for last, largeResult, and nextBufSize filled in by flush - flushOk = jq->getRespHandler()->flush(respSize, bufPtr, last, largeResult, nextBufSize, resultRows); + // Values for last, and nextBufSize filled in by flush + flushOk = jq->getRespHandler()->flush(respSize, bufPtr, last, nextBufSize, resultRows); if (last) { // Last should only be true when the header is read, not the result. throw util::Bug(ERR_LOC, "_processData result had 'last' true, which cannot be allowed."); @@ -545,15 +542,9 @@ void QueryRequest::_processData(JobQuery::Ptr const& jq, int blen, bool xrdLast) } // Read the next header - // Values for last, largeResult, and nextBufSize filled in by flush + // Values for last, and nextBufSize filled in by flush // resultRows is ignored in headers, and should always be 0. - flushOk = jq->getRespHandler()->flush(protoHeaderSize, nextHeaderBufPtr, last, largeResult, nextBufSize, - resultRows); - - if (largeResult) { - if (!_largeResult) LOGS(_log, LOG_LVL_DEBUG, "holdState largeResult set to true"); - _largeResult = true; // Once the worker indicates it's a large result, it stays that way. - } + flushOk = jq->getRespHandler()->flush(protoHeaderSize, nextHeaderBufPtr, last, nextBufSize, resultRows); if (flushOk) { if (last != xrdLast) { diff --git a/src/qdisp/QueryRequest.h b/src/qdisp/QueryRequest.h index 9cefdb74ca..eb6276909f 100644 --- a/src/qdisp/QueryRequest.h +++ b/src/qdisp/QueryRequest.h @@ -133,7 +133,7 @@ class QueryRequest : public XrdSsiRequest, public std::enable_shared_from_this _finishedCalled{false}; - bool _largeResult{false}; ///< True if the worker flags this job as having a large result. QdispPool::Ptr _qdispPool; std::shared_ptr _askForResponseDataCmd; diff --git a/src/qdisp/ResponseHandler.h b/src/qdisp/ResponseHandler.h index 1f721ebd09..6a8c951008 100644 --- a/src/qdisp/ResponseHandler.h +++ b/src/qdisp/ResponseHandler.h @@ -55,13 +55,11 @@ class ResponseHandler { /// Flush the retrieved buffer where bLen bytes were set. If last==true, /// then no more buffer() and flush() calls should occur. /// @return true if successful (no error) - /// last, largeResult, nextBufSize, and resultRows are set in flush. + /// last, nextBufSize, and resultRows are set in flush. /// last - true if no more messages for this job. - /// largeResult - true if there is more than 1 message in the result. /// nextBufSize - size of the next buffer /// resultRows - number of result rows in this result. - virtual bool flush(int bLen, BufPtr const& bufPtr, bool& last, bool& largeResult, int& nextBufSize, - int& resultRows) = 0; + virtual bool flush(int bLen, BufPtr const& bufPtr, bool& last, int& nextBufSize, int& resultRows) = 0; /// Signal an unrecoverable error condition. No further calls are expected. virtual void errorFlush(std::string const& msg, int code) = 0; diff --git a/src/qdisp/XrdSsiMocks.cc b/src/qdisp/XrdSsiMocks.cc index 1ed41a5fde..dfcbd96c19 100644 --- a/src/qdisp/XrdSsiMocks.cc +++ b/src/qdisp/XrdSsiMocks.cc @@ -142,7 +142,6 @@ class Agent : public XrdSsiResponder, public XrdSsiStream { ph->set_size(0); ph->set_md5(std::string("d41d8cd98f00b204e9800998ecf8427")); ph->set_wname("localhost"); - ph->set_largeresult(false); ph->set_endnodata(true); std::string pHdrString; ph->SerializeToString(&pHdrString); diff --git a/src/rproc/InfileMerger.cc b/src/rproc/InfileMerger.cc index 21929ebda2..8a2bacce8c 100644 --- a/src/rproc/InfileMerger.cc +++ b/src/rproc/InfileMerger.cc @@ -206,15 +206,13 @@ bool InfileMerger::merge(std::shared_ptr const& response) } // TODO: Check session id (once session id mgmt is implemented) if (not(response->result.has_jobid() && response->result.has_rowcount() && - response->result.has_transmitsize() && response->result.has_attemptcount() && - response->result.has_rowschema())) { + response->result.has_transmitsize() && response->result.has_attemptcount())) { LOGS(_log, LOG_LVL_ERROR, "merge response missing required field" << " jobid:" << response->result.has_jobid() << " rowcount:" << response->result.has_rowcount() << " transmitsize:" << response->result.has_transmitsize() - << " attemptcount:" << response->result.has_attemptcount() - << " rowschema:" << response->result.has_rowschema()); + << " attemptcount:" << response->result.has_attemptcount()); return false; } int const jobId = response->result.jobid(); diff --git a/src/rproc/InfileMerger.h b/src/rproc/InfileMerger.h index 56c1ae7bc9..bf4aa90aea 100644 --- a/src/rproc/InfileMerger.h +++ b/src/rproc/InfileMerger.h @@ -177,10 +177,6 @@ class InfileMerger { std::string engineToStr(InfileMerger::DbEngine engine); - /// Create the shared thread pool and/or change its size. - // @return the size of the large result thread pool. - static int setLargeResultPoolSize(int size); - /// Merge a worker response, which contains: /// Size of ProtoHeader message /// ProtoHeader message diff --git a/src/rproc/ProtoRowBuffer.cc b/src/rproc/ProtoRowBuffer.cc index 7248ecac7b..91260e4aec 100644 --- a/src/rproc/ProtoRowBuffer.cc +++ b/src/rproc/ProtoRowBuffer.cc @@ -77,7 +77,6 @@ ProtoRowBuffer::ProtoRowBuffer(proto::Result& res, int jobId, std::string const& _jobIdSqlType(jobIdSqlType), _jobIdMysqlType(jobIdMysqlType) { _jobIdStr = std::string("'") + std::to_string(jobId) + "'"; - _initSchema(); if (_result.row_size() > 0) { _initCurrentRow(); } @@ -103,43 +102,8 @@ unsigned ProtoRowBuffer::fetch(char* buffer, unsigned bufLen) { return fetched; } -/// Import schema from the proto message into a Schema object -void ProtoRowBuffer::_initSchema() { - _schema.columns.clear(); - - // Set jobId and attemptCount - sql::ColSchema jobIdCol; - jobIdCol.name = _jobIdColName; - jobIdCol.colType.sqlType = _jobIdSqlType; - jobIdCol.colType.mysqlType = _jobIdMysqlType; - _schema.columns.push_back(jobIdCol); - - proto::RowSchema const& prs = _result.rowschema(); - for (int i = 0, e = prs.columnschema_size(); i != e; ++i) { - proto::ColumnSchema const& pcs = prs.columnschema(i); - sql::ColSchema cs; - if (pcs.has_name()) { - cs.name = pcs.name(); - } - if (not pcs.has_sqltype()) { - throw util::Bug(ERR_LOC, "_initSchema _result missing sqltype"); - } - cs.colType.sqlType = pcs.sqltype(); - if (pcs.has_mysqltype()) { - cs.colType.mysqlType = pcs.mysqltype(); - } - _schema.columns.push_back(cs); - } -} - std::string ProtoRowBuffer::dump() const { - std::string str("ProtoRowBuffer schema("); - for (auto sCol : _schema.columns) { - str += "(Name=" + sCol.name; - str += ",colType=" + sCol.colType.sqlType + ":" + std::to_string(sCol.colType.mysqlType) + ")"; - } - str += ") "; - str += "Row " + std::to_string(_rowIdx) + "("; + std::string str("ProtoRowBuffer Row " + std::to_string(_rowIdx) + "("); str += printCharVect(_currentRow); str += ")"; return str; diff --git a/src/rproc/ProtoRowBuffer.h b/src/rproc/ProtoRowBuffer.h index 9acccdb57f..03862f5ec7 100644 --- a/src/rproc/ProtoRowBuffer.h +++ b/src/rproc/ProtoRowBuffer.h @@ -29,7 +29,6 @@ // Qserv headers #include "mysql/RowBuffer.h" #include "proto/worker.pb.h" -#include "sql/Schema.h" namespace lsst::qserv::rproc { @@ -125,7 +124,6 @@ class ProtoRowBuffer : public mysql::RowBuffer { private: void _initCurrentRow(); - void _initSchema(); void _readNextRow(); // Copy a row bundle into a destination STL char container template @@ -149,7 +147,6 @@ class ProtoRowBuffer : public mysql::RowBuffer { std::string _nullToken; ///< Null indicator (e.g. \N) proto::Result& _result; ///< Ref to Resultmessage - sql::Schema _schema; ///< Schema object int _rowIdx; ///< Row index int _rowTotal; ///< Total row count std::vector _currentRow; ///< char buffer representing current row. diff --git a/src/wbase/TransmitData.cc b/src/wbase/TransmitData.cc index 044e337f86..936a8eec12 100644 --- a/src/wbase/TransmitData.cc +++ b/src/wbase/TransmitData.cc @@ -36,6 +36,7 @@ #include "global/LogContext.h" #include "proto/ProtoHeaderWrap.h" #include "util/Bug.h" +#include "util/InstanceCount.h" #include "util/MultiError.h" #include "util/StringHash.h" #include "wbase/Task.h" @@ -71,7 +72,6 @@ proto::ProtoHeader* TransmitData::_createHeader() { hdr->set_size(0); hdr->set_md5(util::StringHash::getMd5("", 0)); hdr->set_wname(getHostname()); - hdr->set_largeresult(false); hdr->set_endnodata(true); return hdr; } @@ -146,23 +146,17 @@ xrdsvc::StreamBuffer::Ptr TransmitData::getStreamBuffer(Task::Ptr const& task) { return xrdsvc::StreamBuffer::createWithMove(_dataMsg, task); } -void TransmitData::_buildHeader(bool largeResult) { +void TransmitData::_buildHeader(lock_guard const& lock) { LOGS(_log, LOG_LVL_DEBUG, _idStr << "TransmitData::_buildHeader"); // The size of the dataMsg must include space for the header for the next dataMsg. _header->set_size(_dataMsg.size() + proto::ProtoHeaderWrap::getProtoHeaderSize()); // The md5 hash must not include the header for the next dataMsg. _header->set_md5(util::StringHash::getMd5(_dataMsg.data(), _dataMsg.size())); - _header->set_largeresult(largeResult); _header->set_endnodata(false); } -void TransmitData::buildDataMsg(Task const& task, bool largeResult, util::MultiError& multiErr) { - lock_guard lock(_trMtx); - _buildDataMsg(task, largeResult, multiErr); -} - -void TransmitData::_buildDataMsg(Task const& task, bool largeResult, util::MultiError& multiErr) { +void TransmitData::buildDataMsg(Task const& task, util::MultiError& multiErr) { QSERV_LOGCONTEXT_QUERY_JOB(task.getQueryId(), task.getJobId()); LOGS(_log, LOG_LVL_INFO, _idStr << "TransmitData::_buildDataMsg rowCount=" << _rowCount << " tSize=" << _tSize); @@ -181,16 +175,14 @@ void TransmitData::_buildDataMsg(Task const& task, bool largeResult, util::Multi _result->SerializeToString(&_dataMsg); // Build the header for this message, but this message can't be transmitted until the // next header has been built and appended to _transmitData->dataMsg. That happens - // later in SendChannelShared. - _buildHeader(largeResult); + // later in ChannelShared. + _buildHeader(lock); } void TransmitData::initResult(Task& task, std::vector& schemaCols) { lock_guard lock(_trMtx); _result->set_queryid(task.getQueryId()); _result->set_jobid(task.getJobId()); - _result->mutable_rowschema(); - if (task.getSession() >= 0) { _result->set_session(task.getSession()); } diff --git a/src/wbase/TransmitData.h b/src/wbase/TransmitData.h index 29f991c8e5..5c1e9f1cea 100644 --- a/src/wbase/TransmitData.h +++ b/src/wbase/TransmitData.h @@ -24,25 +24,24 @@ // System headers #include #include +#include #include // 3rd party headers #include // Qserv headers -#include "proto/ProtoHeaderWrap.h" +#include "proto/worker.pb.h" #include "qmeta/types.h" namespace google::protobuf { class Arena; } // namespace google::protobuf -// This header declarations -namespace lsst::qserv { - -namespace util { +namespace lsst::qserv::util { +class InstanceCount; class MultiError; -} +} // namespace lsst::qserv::util namespace xrdsvc { class StreamBuffer; @@ -52,19 +51,6 @@ namespace wbase { class Task; -/// This class stores properties for one column in the schema. -class SchemaCol { -public: - SchemaCol() = default; - SchemaCol(SchemaCol const&) = default; - SchemaCol& operator=(SchemaCol const&) = default; - SchemaCol(std::string name, std::string sqltype, int mysqltype) - : colName(name), colSqlType(sqltype), colMysqlType(mysqltype) {} - std::string colName; - std::string colSqlType; ///< sqltype for the column - int colMysqlType = 0; ///< MySQL type number -}; - /// This class is used to store information needed for one transmit. /// The data may be for result rows or an error message. class TransmitData { @@ -116,7 +102,7 @@ class TransmitData { void addSchemaCols(std::vector& schemaCols); /// Use the information collected in _result and multiErr to build _dataMsg. - void buildDataMsg(Task const& task, bool largeResult, util::MultiError& multiErr); + void buildDataMsg(Task const& task, util::MultiError& multiErr); /// @return true if tData has an error message in _result. bool hasErrormsg() const; @@ -150,17 +136,8 @@ class TransmitData { /// Note: _trMtx must be held before calling this. std::string _makeHeaderString(bool reallyLast, uint32_t seq, int scsSeq); - /// @see buildDataMsg - /// Note: _trMtx must be held before calling this. - void _buildDataMsg(Task const& task, bool largeResult, util::MultiError& multiErr); - - //////////////////////////////////////////////////// - // Methods used by QueryRunner to build dataMsg - void _buildHeader(bool largeResult); - - /// @see addSchemaCols - /// Note: _trMtx must be held before calling this. - void _addSchemaCols(std::vector& schemaCols); + /// @param lock - on _trMtx must be held before calling this methid. + void _buildHeader(std::lock_guard const& lock); /// @see dump() std::string _dump() const; diff --git a/src/wdb/QueryRunner.h b/src/wdb/QueryRunner.h index f1c7947b5d..ae2cc0e7b5 100644 --- a/src/wdb/QueryRunner.h +++ b/src/wdb/QueryRunner.h @@ -123,8 +123,6 @@ class QueryRunner : public wbase::TaskQueryRunner, public std::enable_shared_fro util::MultiError _multiError; // Error log - bool _largeResult = false; //< True for all transmits after the first transmit. - /// Used to limit the number of open MySQL connections. std::shared_ptr const _sqlConnMgr; std::atomic _runQueryCalled{false}; ///< If runQuery gets called twice, the scheduler messed up. From 7e4dd53150e0196333c7b651d8b252dfa71fa10f Mon Sep 17 00:00:00 2001 From: Igor Gaponenko Date: Wed, 1 Mar 2023 07:13:35 +0000 Subject: [PATCH 4/8] Major refactoring and code cleanup in the worker query processing classes Split wbase::SendChannelShared into the base class and an implementation Did the minor refactoring in naming and using XROOTD/SSI streaming channels These change were required to prepare ground for introducing other "ChannelShared"-alike implementations of the result processing and delivery mechanisms. The refactoring aims at correcting dependencies in a code that creates and sets up worker tasks, so that task initialization was happening completelly within the implementation file of wbase/Task.cc rather than being spread between wbase/Task.cc and wcontrol/Foreman.cc. Altogether this is meant to improve the observability of the code and make it easier to maintain. Also, some minor code cleanup and removal of unused header includes and forward declarations was made. Thould reduce the compilation time of the relevant modules. Got rid of unused members and methods Classes wbase::ChannelShared (used to be wbase::SendChannelShared) and wbase::TransmitData had an obsolete mechanism for setting up result schema in the response messages to be sent to Czar. The current implementation no longer uses that API. The obsilete API was removed to avoid confusions as it wasn't obvious where the actual schema settings were made. Got rid of std:: in the iomplementation files with "using std". Guaranteed and enforced synchronization in private methods that require a lock to be held before calling the methods. In the new code, a cons reference to a lock is passed around contexts (methods) where such lock is required. This compiler-enforced technique has proven to work for large code bases where it would be hard to track a state of the lock by other methods. The only other alternative method would be to do the run-time inspection of the lock at the entrance of a method. --- src/wbase/CMakeLists.txt | 1 + src/wbase/ChannelShared.cc | 337 +++++++++++++++++++++++++++++++ src/wbase/ChannelShared.h | 248 +++++++++++++++++++++++ src/wbase/SendChannel.h | 2 +- src/wbase/SendChannelShared.cc | 352 +++------------------------------ src/wbase/SendChannelShared.h | 252 +++-------------------- src/wbase/Task.cc | 45 ++++- src/wbase/Task.h | 28 ++- src/wbase/TransmitData.cc | 104 +++++----- src/wbase/TransmitData.h | 90 ++++----- src/wcontrol/Foreman.cc | 59 ++---- src/wcontrol/Foreman.h | 58 ++++-- src/wcontrol/SqlConnMgr.cc | 10 +- src/wcontrol/SqlConnMgr.h | 6 +- src/wcontrol/WorkerStats.h | 2 +- src/wdb/QueryRunner.cc | 26 +-- src/wdb/QueryRunner.h | 12 +- src/wdb/testQueryRunner.cc | 15 +- src/wsched/testSchedulers.cc | 13 +- src/xrdsvc/SsiRequest.cc | 72 ++++--- src/xrdsvc/SsiRequest.h | 38 ++-- src/xrdsvc/SsiService.cc | 11 +- src/xrdsvc/SsiService.h | 9 +- 23 files changed, 933 insertions(+), 857 deletions(-) create mode 100644 src/wbase/ChannelShared.cc create mode 100644 src/wbase/ChannelShared.h diff --git a/src/wbase/CMakeLists.txt b/src/wbase/CMakeLists.txt index 5fc8bc8de6..01b051a2c9 100644 --- a/src/wbase/CMakeLists.txt +++ b/src/wbase/CMakeLists.txt @@ -3,6 +3,7 @@ add_dependencies(wbase proto) target_sources(wbase PRIVATE Base.cc + ChannelShared.cc SendChannel.cc SendChannelShared.cc Task.cc diff --git a/src/wbase/ChannelShared.cc b/src/wbase/ChannelShared.cc new file mode 100644 index 0000000000..28df74bcf8 --- /dev/null +++ b/src/wbase/ChannelShared.cc @@ -0,0 +1,337 @@ +/* + * LSST Data Management System + * + * This product includes software developed by the + * LSST Project (http://www.lsst.org/). + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the LSST License Statement and + * the GNU General Public License along with this program. If not, + * see . + */ + +// Class header +#include "wbase/ChannelShared.h" + +// Qserv headers +#include "global/LogContext.h" +#include "proto/ProtoHeaderWrap.h" +#include "qmeta/types.h" +#include "util/Bug.h" +#include "util/Error.h" +#include "wcontrol/TransmitMgr.h" +#include "wbase/Task.h" +#include "wpublish/QueriesAndChunks.h" + +// LSST headers +#include "lsst/log/Log.h" + +using namespace std; + +namespace { +LOG_LOGGER _log = LOG_GET("lsst.qserv.wbase.ChannelShared"); +} + +namespace lsst::qserv::wbase { + +atomic ChannelShared::scsSeqId{0}; + +ChannelShared::ChannelShared(shared_ptr const& sendChannel, + shared_ptr const& transmitMgr, qmeta::CzarId czarId) + : _sendChannel(sendChannel), _transmitMgr(transmitMgr), _czarId(czarId), _scsId(scsSeqId++) { + if (_sendChannel == nullptr) { + throw util::Bug(ERR_LOC, "ChannelShared constructor given nullptr"); + } +} + +ChannelShared::~ChannelShared() { + if (_sendChannel != nullptr) { + _sendChannel->setDestroying(); + if (!_sendChannel->isDead()) { + _sendChannel->kill("~ChannelShared()"); + } + } +} + +bool ChannelShared::send(char const* buf, int bufLen) { + lock_guard const streamMutexLock(_streamMutex); + return _sendChannel->send(buf, bufLen); +} + +bool ChannelShared::sendError(string const& msg, int code) { + lock_guard const streamMutexLock(_streamMutex); + return _sendChannel->sendError(msg, code); +} + +bool ChannelShared::sendFile(int fd, wbase::SendChannel::Size fSize) { + lock_guard const streamMutexLock(_streamMutex); + return _sendChannel->sendFile(fd, fSize); +} + +bool ChannelShared::sendStream(xrdsvc::StreamBuffer::Ptr const& sBuf, bool last, int scsSeq) { + lock_guard const streamMutexLock(_streamMutex); + return _sendChannel->sendStream(sBuf, last, scsSeq); +} + +bool ChannelShared::kill(string const& note) { + lock_guard const streamMutexLock(_streamMutex); + return _kill(streamMutexLock, note); +} + +bool ChannelShared::isDead() { + if (_sendChannel == nullptr) return true; + return _sendChannel->isDead(); +} + +void ChannelShared::setTaskCount(int taskCount) { _taskCount = taskCount; } + +bool ChannelShared::transmitTaskLast() { + lock_guard const streamMutexLock(_streamMutex); + ++_lastCount; + bool lastTaskDone = _lastCount >= _taskCount; + return lastTaskDone; +} + +bool ChannelShared::_kill(lock_guard const& streamMutexLock, string const& note) { + LOGS(_log, LOG_LVL_DEBUG, "ChannelShared::kill() called " << note); + bool ret = _sendChannel->kill(note); + _lastRecvd = true; + return ret; +} + +string ChannelShared::makeIdStr(int qId, int jId) { + string str("QID" + (qId == 0 ? "" : to_string(qId) + "#" + to_string(jId))); + return str; +} + +uint64_t ChannelShared::getSeq() const { return _sendChannel->getSeq(); } + +string ChannelShared::dumpTransmit() const { + lock_guard const tMtxLock(tMtx); + return dumpTransmit(tMtxLock); +} + +bool ChannelShared::buildAndTransmitError(util::MultiError& multiErr, Task::Ptr const& task, bool cancelled) { + auto qId = task->getQueryId(); + bool scanInteractive = true; + waitTransmitLock(scanInteractive, qId); + lock_guard const tMtxLock(tMtx); + // Ignore the existing transmitData object as it is irrelevant now + // that there's an error. Create a new one to send the error. + TransmitData::Ptr tData = createTransmit(tMtxLock, *task); + transmitData = tData; + transmitData->buildDataMsg(*task, multiErr); + LOGS(_log, LOG_LVL_DEBUG, "ChannelShared::buildAndTransmitError " << dumpTransmit(tMtxLock)); + bool lastIn = true; + return prepTransmit(tMtxLock, task, cancelled, lastIn); +} + +string ChannelShared::dumpTransmit(lock_guard const& lock) const { + return string("ChannelShared::dumpTransmit ") + + (transmitData == nullptr ? "nullptr" : transmitData->dump()); +} + +void ChannelShared::waitTransmitLock(bool interactive, QueryId const& qId) { + if (_transmitLock != nullptr) return; + { + unique_lock uLock(_transmitLockMtx); + if (_firstTransmitLock.exchange(false)) { + // This will wait until TransmitMgr has resources available. + _transmitLock.reset(new wcontrol::TransmitLock(*_transmitMgr, interactive, qId)); + } else { + _transmitLockCv.wait(uLock, [this]() { return _transmitLock != nullptr; }); + } + } + _transmitLockCv.notify_one(); +} + +void ChannelShared::initTransmit(lock_guard const& tMtxLock, Task& task) { + LOGS(_log, LOG_LVL_TRACE, "initTransmit " << task.getIdStr() << " seq=" << task.getTSeq()); + if (transmitData == nullptr) { + transmitData = createTransmit(tMtxLock, task); + } +} + +TransmitData::Ptr ChannelShared::createTransmit(lock_guard const& tMtxLock, Task& task) { + LOGS(_log, LOG_LVL_TRACE, "createTransmit " << task.getIdStr() << " seq=" << task.getTSeq()); + auto tData = wbase::TransmitData::createTransmitData(_czarId, task.getIdStr()); + tData->initResult(task); + return tData; +} + +bool ChannelShared::prepTransmit(lock_guard const& tMtxLock, Task::Ptr const& task, bool cancelled, + bool lastIn) { + auto qId = task->getQueryId(); + int jId = task->getJobId(); + + QSERV_LOGCONTEXT_QUERY_JOB(qId, jId); + LOGS(_log, LOG_LVL_DEBUG, "_transmit lastIn=" << lastIn); + if (isDead()) { + LOGS(_log, LOG_LVL_INFO, "aborting transmit since sendChannel is dead."); + return false; + } + + // Have all rows already been read, or an error? + bool erred = transmitData->hasErrormsg(); + + bool success = addTransmit(tMtxLock, task, cancelled, erred, lastIn, transmitData, qId, jId); + + // Now that transmitData is on the queue, reset and initialize a new one. + transmitData.reset(); + initTransmit(tMtxLock, *task); // reset transmitData + + return success; +} + +bool ChannelShared::addTransmit(lock_guard const& tMtxLock, Task::Ptr const& task, bool cancelled, + bool erred, bool lastIn, TransmitData::Ptr const& tData, int qId, int jId) { + QSERV_LOGCONTEXT_QUERY_JOB(qId, jId); + assert(tData != nullptr); + + // This lock may be held for a very long time. + lock_guard const queueMtxLock(_queueMtx); + _transmitQueue.push(tData); + + // If _lastRecvd is true, the last message has already been transmitted and + // this SendChannel is effectively dead. + bool reallyLast = _lastRecvd; + string idStr(makeIdStr(qId, jId)); + if (_icPtr == nullptr) { + _icPtr = make_shared(to_string(qId) + "_SCS_LDB"); + } + + // If something bad already happened, just give up. + if (reallyLast || isDead()) { + // If there's been some kind of error, make sure that nothing hangs waiting + // for this. + LOGS(_log, LOG_LVL_WARN, "addTransmit getting messages after isDead or reallyLast " << idStr); + _lastRecvd = true; + return false; + } + + // If lastIn is true, all tasks for this job have run to completion and + // finished building their transmit messages. + if (lastIn) { + reallyLast = true; + } + if (reallyLast || erred || cancelled) { + _lastRecvd = true; + LOGS(_log, LOG_LVL_DEBUG, + "addTransmit lastRecvd=" << _lastRecvd << " really=" << reallyLast << " erred=" << erred + << " cancelled=" << cancelled); + } + + return _transmit(tMtxLock, queueMtxLock, erred, task); +} + +bool ChannelShared::_transmit(lock_guard const& tMtxLock, lock_guard const& queueMtxLock, + bool erred, Task::Ptr const& task) { + string idStr = "QID?"; + + // Result data is transmitted in messages containing data and headers. + // data - is the result data + // header - contains information about the next chunk of result data, + // most importantly the size of the next data message. + // The header has a fixed size (about 255 bytes) + // header_END - indicates there will be no more msg. + // msg - contains data and header. + // metadata - special xrootd buffer that can only be set once per ChannelShared + // instance. It is used to send the first header. + // A complete set of results to the czar looks like + // metadata[header_A] -> msg_A[data_A, header_END] + // or + // metadata[header_A] -> msg_A[data_A, header_B] + // -> msg_B[data_B, header_C] -> ... -> msg_X[data_x, header_END] + // + // Since you can't send msg_A until you know the size of data_B, you can't + // transmit until there are at least 2 msg in the queue, or you know + // that msg_A is the last msg in the queue. + // Note that the order of result rows does not matter, but data_B must come after header_B. + // Keep looping until nothing more can be transmitted. + while (_transmitQueue.size() >= 2 || _lastRecvd) { + TransmitData::Ptr thisTransmit = _transmitQueue.front(); + _transmitQueue.pop(); + if (thisTransmit == nullptr) { + throw util::Bug(ERR_LOC, "_transmitLoop() _transmitQueue had nullptr!"); + } + + auto sz = _transmitQueue.size(); + // Is this really the last message for this SharedSendChannel? + bool reallyLast = (_lastRecvd && sz == 0); + + TransmitData::Ptr nextTr; + if (sz != 0) { + nextTr = _transmitQueue.front(); + if (nextTr->getResultSize() == 0) { + LOGS(_log, LOG_LVL_ERROR, + "RESULT SIZE IS 0, this should not happen thisTr=" << thisTransmit->dump() + << " nextTr=" << nextTr->dump()); + } + } + uint32_t seq = _sendChannel->getSeq(); + int scsSeq = ++_scsSeq; + string seqStr = string("seq=" + to_string(seq) + " scsseq=" + to_string(scsSeq) + + " scsId=" + to_string(_scsId)); + thisTransmit->attachNextHeader(nextTr, reallyLast, seq, scsSeq); + + // The first message needs to put its header data in metadata as there's + // no previous message it could attach its header to. + { + lock_guard const streamMutexLock(_streamMutex); // Must keep meta and buffer together. + if (_firstTransmit.exchange(false)) { + // Put the header for the first message in metadata + // _metaDataBuf must remain valid until Finished() is called. + string thisHeaderString = thisTransmit->getHeaderString(seq, scsSeq - 1); + _metadataBuf = proto::ProtoHeaderWrap::wrap(thisHeaderString); + bool metaSet = _sendChannel->setMetadata(_metadataBuf.data(), _metadataBuf.size()); + if (!metaSet) { + LOGS(_log, LOG_LVL_ERROR, "Failed to setMeta " << idStr); + _kill(streamMutexLock, "metadata"); + return false; + } + } + + // Put the data for the transmit in a StreamBuffer and send it. + // Since the StreamBuffer's lifetime is beyond our control, it needs + // its own Task pointer. + auto streamBuf = thisTransmit->getStreamBuffer(task); + streamBuf->startTimer(); + bool sent = _sendBuf(tMtxLock, queueMtxLock, streamMutexLock, streamBuf, reallyLast, + "transmitLoop " + idStr + " " + seqStr, scsSeq); + + if (!sent) { + LOGS(_log, LOG_LVL_ERROR, "Failed to send " << idStr); + _kill(streamMutexLock, "ChannelShared::_transmit b"); + return false; + } + } + // If that was the last message, break the loop. + if (reallyLast) return true; + } + return true; +} + +bool ChannelShared::_sendBuf(lock_guard const& tMtxLock, lock_guard const& queueMtxLock, + lock_guard const& streamMutexLock, xrdsvc::StreamBuffer::Ptr& streamBuf, + bool last, string const& note, int scsSeq) { + bool sent = _sendChannel->sendStream(streamBuf, last, scsSeq); + if (!sent) { + LOGS(_log, LOG_LVL_ERROR, "Failed to transmit " << note << "!"); + return false; + } else { + LOGS(_log, LOG_LVL_INFO, "_sendbuf wait start " << note); + streamBuf->waitForDoneWithThis(); // Block until this buffer has been sent. + } + return sent; +} + +} // namespace lsst::qserv::wbase diff --git a/src/wbase/ChannelShared.h b/src/wbase/ChannelShared.h new file mode 100644 index 0000000000..81c17483e7 --- /dev/null +++ b/src/wbase/ChannelShared.h @@ -0,0 +1,248 @@ +/* + * LSST Data Management System + * + * This product includes software developed by the + * LSST Project (http://www.lsst.org/). + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the LSST License Statement and + * the GNU General Public License along with this program. If not, + * see . + */ + +#ifndef LSST_QSERV_WBASE_CHANNELSHARED_H +#define LSST_QSERV_WBASE_CHANNELSHARED_H + +// System headers +#include +#include +#include +#include +#include +#include + +// Third-party headers +#include + +// Qserv headers +#include "qmeta/types.h" +#include "util/InstanceCount.h" +#include "wbase/SendChannel.h" +#include "wbase/TransmitData.h" + +namespace lsst::qserv::wcontrol { +class TransmitLock; +class TransmitMgr; +} // namespace lsst::qserv::wcontrol + +namespace lsst::qserv::wbase { +class Task; +} + +namespace lsst::qserv::util { +class MultiError; +} + +namespace lsst::qserv::wbase { + +/// The base class for a family of the shared channels +class ChannelShared { +public: + using Ptr = std::shared_ptr; + + static std::atomic scsSeqId; ///< Source for unique _scsId numbers + + ChannelShared() = delete; + ChannelShared(ChannelShared const&) = delete; + ChannelShared& operator=(ChannelShared const&) = delete; + + /// Non-trival d-tor is needed to close the channel. + virtual ~ChannelShared(); + + /// Wrappers for wbase::SendChannel public functions that may need to be used + /// by threads. + /// @see wbase::SendChannel::send + bool send(char const* buf, int bufLen); + + /// @see wbase::SendChannel::sendError + bool sendError(std::string const& msg, int code); + + /// @see wbase::SendChannel::sendFile + bool sendFile(int fd, wbase::SendChannel::Size fSize); + + /// @see wbase::SendChannel::sendStream + bool sendStream(xrdsvc::StreamBuffer::Ptr const& sBuf, bool last, int scsSeq = -1); + + /// @see wbase::SendChannel::kill + bool kill(std::string const& note); + + /// @see wbase::SendChannel::isDead + bool isDead(); + + /// Set the number of Tasks that will be sent using this wbase::SendChannel. + /// This should not be changed once set. + void setTaskCount(int taskCount); + int getTaskCount() const { return _taskCount; } + + /// @return true if this is the last task to call this + bool transmitTaskLast(); + + /// Return a normalized id string. + static std::string makeIdStr(int qId, int jId); + + /// @return the channel sequence number (this will not be valid until after + /// the channel is open.) + uint64_t getSeq() const; + + /// @return the sendChannelShared sequence number, which is always valid. + uint64_t getScsId() const { return _scsId; } + + /// @return the current sql connection count + int getSqlConnectionCount() { return _sqlConnectionCount; } + + /// @return the sql connection count after incrementing by 1. + int incrSqlConnectionCount() { return ++_sqlConnectionCount; } + + /// @return true if this is the first time this function has been called. + bool getFirstChannelSqlConn() { return _firstChannelSqlConn.exchange(false); } + + /// @return a transmit data object indicating the errors in 'multiErr'. + bool buildAndTransmitError(util::MultiError& multiErr, std::shared_ptr const& task, bool cancelled); + + /// Put the SQL results in a TransmitData object and transmit it to the czar + /// if appropriate. + /// @return true if there was an error. + virtual bool buildAndTransmitResult(MYSQL_RES* mResult, std::shared_ptr const& task, + util::MultiError& multiErr, std::atomic& cancelled) = 0; + + /// @return a log worthy string describing transmitData. + std::string dumpTransmit() const; + +protected: + /// Protected constructor is seen by subclasses only. + ChannelShared(std::shared_ptr const& sendChannel, + std::shared_ptr const& transmitMgr, qmeta::CzarId czarId); + + std::shared_ptr const sendChannel() const { return _sendChannel; } + + /// Dumps transmitData into a string within the thread-safe context. + /// @param tMtxLock - Lock on mutex tMtx to be acquired before calling the method. + std::string dumpTransmit(std::lock_guard const& tMtxLock) const; + + /// @return a new TransmitData::Ptr object. + /// @param tMtxLock - Lock on mutex tMtx to be acquired before calling the method. + TransmitData::Ptr createTransmit(std::lock_guard const& tMtxLock, Task& task); + + /// Create a new transmitData object if needed. + /// @param tMtxLock - Lock on mutex tMtx to be acquired before calling the method. + void initTransmit(std::lock_guard const& tMtxLock, Task& task); + + /// Prepare the transmit data and then call addTransmit. + /// @param tMtxLock - Lock on mutex tMtx to be acquired before calling the method. + virtual bool prepTransmit(std::lock_guard const& tMtxLock, std::shared_ptr const& task, + bool cancelled, bool lastIn); + + /// Try to transmit the data in tData. + /// If the queue already has at least 2 TransmitData objects, addTransmit + /// may wait before returning. Result rows are read from the + /// database until there are no more rows or the buffer is + /// sufficiently full. addTransmit waits until that buffer has been + /// sent to the czar before reading more rows. Without the wait, + /// the worker may read in too many result rows, run out of memory, + /// and crash. + /// @param tMtxLock - Lock on mutex tMtx to be acquired before calling the method. + /// @return true if transmit was added successfully. + /// @see ChannelShared::_transmit code for further explanation. + bool addTransmit(std::lock_guard const& tMtxLock, std::shared_ptr const& task, + bool cancelled, bool erred, bool lastIn, TransmitData::Ptr const& tData, int qId, + int jId); + + /// Items to share one TransmitLock across all Task's using this + /// ChannelShared. If all Task's using this channel are not + /// allowed to complete, deadlock is likely. + void waitTransmitLock(bool interactive, QueryId const& qId); + + std::shared_ptr transmitData; ///< TransmitData object + mutable std::mutex tMtx; ///< protects transmitData + +private: + /// @see wbase::SendChannel::kill + /// @param streamMutexLock - Lock on mutex _streamMutex to be acquired before calling the method. + bool _kill(std::lock_guard const& streamMutexLock, std::string const& note); + + /// Encode TransmitData items from _transmitQueue and pass them to XrdSsi + /// to be sent to the czar. + /// The header for the 'nextTransmit' item is appended to the result of + /// 'thisTransmit', with a specially constructed header appended for the + /// 'reallyLast' transmit. + /// The specially constructed header for the 'reallyLast' transmit just + /// says that there's no more data, this wbase::SendChannel is done. + /// @param tMtxLock - Lock on mutex tMtx to be acquired before calling the method. + /// @param queueMtxLock - Lock on mutex _queueMtx to be acquired before calling the method. + bool _transmit(std::lock_guard const& tMtxLock, + std::lock_guard const& queueMtxLock, bool erred, + std::shared_ptr const& task); + + /// Send the buffer 'streamBuffer' using xrdssi. + /// 'last' should only be true if this is the last buffer to be sent with this _sendChannel. + /// 'note' is just a log note about what/who is sending the buffer. + /// @param tMtxLock - Lock on mutex tMtx to be acquired before calling the method. + /// @param queueMtxLock - Lock on mutex _queueMtx to be acquired before calling the method. + /// @param streamMutexLock - Lock on mutex _streamMutex to be acquired before calling the method. + /// @return true if the buffer was sent. + bool _sendBuf(std::lock_guard const& tMtxLock, + std::lock_guard const& queueMtxLock, + std::lock_guard const& streamMutexLock, xrdsvc::StreamBuffer::Ptr& streamBuf, + bool last, std::string const& note, int scsSeq); + + std::atomic _firstTransmitLock{true}; ///< True until the first thread tries to lock transmitLock. + std::shared_ptr _transmitLock; ///< Hold onto transmitLock until finished. + std::mutex _transmitLockMtx; ///< protects access to _transmitLock. + std::condition_variable _transmitLockCv; + + std::shared_ptr const _sendChannel; ///< Used to pass encoded information to XrdSsi. + + std::shared_ptr _transmitMgr; ///< Pointer to the TransmitMgr + + /// streamMutex is used to protect _lastCount and messages that are sent + /// using ChannelShared. + std::mutex _streamMutex; + + std::queue _transmitQueue; ///< Queue of data to be encoded and sent. + std::mutex _queueMtx; ///< protects _transmitQueue + + /// metadata buffer. Once set, it cannot change until after Finish() has been called. + std::string _metadataBuf; + + int _taskCount = 0; ///< The number of tasks to be sent over this wbase::SendChannel. + int _lastCount = 0; ///< The number of 'last' buffers received. + std::atomic _lastRecvd{false}; ///< The truly 'last' transmit message is in the queue. + std::atomic _firstTransmit{true}; ///< True until the first transmit has been sent. + + qmeta::CzarId const _czarId; ///< id of the czar that requested this task(s). + uint64_t const _scsId; ///< id number for this ChannelShared + std::atomic _scsSeq{0}; ///< ChannelSharedsequence number for transmit. + + /// The number of sql connections opened to handle the Tasks using this ChannelShared. + /// Once this is greater than 0, this object needs free access to sql connections to avoid + // system deadlock. @see SqlConnMgr::_take() and SqlConnMgr::_release(). + std::atomic _sqlConnectionCount{0}; + + /// true until getFirstChannelSqlConn() is called. + std::atomic _firstChannelSqlConn{true}; + + std::shared_ptr _icPtr; ///< temporary for LockupDB +}; + +} // namespace lsst::qserv::wbase + +#endif // LSST_QSERV_WBASE_CHANNELSHARED_H diff --git a/src/wbase/SendChannel.h b/src/wbase/SendChannel.h index 876c713952..573cba00e3 100644 --- a/src/wbase/SendChannel.h +++ b/src/wbase/SendChannel.h @@ -63,7 +63,7 @@ class SendChannel { /// Send a bucket of bytes. /// @param last true if no more sendStream calls will be invoked. - /// @param scsSeq - is the SendChannelShared sequence number, if there is one. + /// @param scsSeq - is the ChannelShared sequence number, if there is one. virtual bool sendStream(xrdsvc::StreamBuffer::Ptr const& sBuf, bool last, int scsSeq = -1); /// diff --git a/src/wbase/SendChannelShared.cc b/src/wbase/SendChannelShared.cc index 1f90da5888..d11e12c6bb 100644 --- a/src/wbase/SendChannelShared.cc +++ b/src/wbase/SendChannelShared.cc @@ -22,19 +22,12 @@ // Class header #include "wbase/SendChannelShared.h" -// System headers - // Qserv headers -#include "global/LogContext.h" #include "proto/ProtoHeaderWrap.h" -#include "util/Bug.h" -#include "util/Error.h" -#include "util/HoldTrack.h" -#include "util/MultiError.h" -#include "util/Timer.h" #include "wbase/Task.h" -#include "wcontrol/TransmitMgr.h" #include "wpublish/QueriesAndChunks.h" +#include "util/MultiError.h" +#include "util/Timer.h" // LSST headers #include "lsst/log/Log.h" @@ -47,248 +40,19 @@ LOG_LOGGER _log = LOG_GET("lsst.qserv.wbase.SendChannelShared"); namespace lsst::qserv::wbase { -atomic SendChannelShared::scsSeqId{0}; - -SendChannelShared::Ptr SendChannelShared::create(SendChannel::Ptr const& sendChannel, - wcontrol::TransmitMgr::Ptr const& transmitMgr, +SendChannelShared::Ptr SendChannelShared::create(shared_ptr const& sendChannel, + shared_ptr const& transmitMgr, qmeta::CzarId czarId) { - auto scs = shared_ptr(new SendChannelShared(sendChannel, transmitMgr, czarId)); - return scs; + return shared_ptr(new SendChannelShared(sendChannel, transmitMgr, czarId)); } -SendChannelShared::SendChannelShared(SendChannel::Ptr const& sendChannel, - std::shared_ptr const& transmitMgr, +SendChannelShared::SendChannelShared(shared_ptr const& sendChannel, + shared_ptr const& transmitMgr, qmeta::CzarId czarId) - : _sendChannel(sendChannel), _transmitMgr(transmitMgr), _czarId(czarId), _scsId(scsSeqId++) { - if (_sendChannel == nullptr) { - throw util::Bug(ERR_LOC, "SendChannelShared constructor given nullptr"); - } -} - -SendChannelShared::~SendChannelShared() { - if (_sendChannel != nullptr) { - _sendChannel->setDestroying(); - if (!_sendChannel->isDead()) { - _sendChannel->kill("~SendChannelShared()"); - } - } -} - -void SendChannelShared::setTaskCount(int taskCount) { _taskCount = taskCount; } - -bool SendChannelShared::transmitTaskLast(bool inLast) { - lock_guard streamLock(_streamMutex); - /// _caller must have locked _streamMutex before calling this. - if (not inLast) return false; // This wasn't the last message buffer for this task, so it doesn't matter. - ++_lastCount; - bool lastTaskDone = _lastCount >= _taskCount; - return lastTaskDone; -} - -bool SendChannelShared::_kill(std::string const& note) { - LOGS(_log, LOG_LVL_DEBUG, "SendChannelShared::kill() called " << note); - bool ret = _sendChannel->kill(note); - _lastRecvd = true; - return ret; -} - -string SendChannelShared::makeIdStr(int qId, int jId) { - string str("QID" + (qId == 0 ? "" : to_string(qId) + "#" + to_string(jId))); - return str; -} - -void SendChannelShared::_waitTransmitLock(bool interactive, QueryId const& qId) { - if (_transmitLock != nullptr) { - return; - } - - { - unique_lock uLock(_transmitLockMtx); - bool first = _firstTransmitLock.exchange(false); - if (first) { - // This will wait until TransmitMgr has resources available. - _transmitLock.reset(new wcontrol::TransmitLock(*_transmitMgr, interactive, qId)); - } else { - _transmitLockCv.wait(uLock, [this]() { return _transmitLock != nullptr; }); - } - } - _transmitLockCv.notify_one(); -} - -bool SendChannelShared::_addTransmit(Task::Ptr const& task, bool cancelled, bool erred, bool lastIn, - TransmitData::Ptr const& tData, int qId, int jId) { - QSERV_LOGCONTEXT_QUERY_JOB(qId, jId); - assert(tData != nullptr); - - // This lock may be held for a very long time. - std::unique_lock qLock(_queueMtx); - _transmitQueue.push(tData); - - // If _lastRecvd is true, the last message has already been transmitted and - // this SendChannel is effectively dead. - bool reallyLast = _lastRecvd; - string idStr(makeIdStr(qId, jId)); - if (_icPtr == nullptr) { - _icPtr = std::make_shared(std::to_string(qId) + "_SCS_LDB"); - } - - // If something bad already happened, just give up. - if (reallyLast || isDead()) { - // If there's been some kind of error, make sure that nothing hangs waiting - // for this. - LOGS(_log, LOG_LVL_WARN, "addTransmit getting messages after isDead or reallyLast " << idStr); - _lastRecvd = true; - return false; - } - - // If lastIn is true, all tasks for this job have run to completion and - // finished building their transmit messages. - if (lastIn) { - reallyLast = true; - } - if (reallyLast || erred || cancelled) { - _lastRecvd = true; - LOGS(_log, LOG_LVL_DEBUG, - "addTransmit lastRecvd=" << _lastRecvd << " really=" << reallyLast << " erred=" << erred - << " cancelled=" << cancelled); - } - - return _transmit(erred, task); -} - -bool SendChannelShared::_transmit(bool erred, Task::Ptr const& task) { - string idStr = "QID?"; - - // Result data is transmitted in messages containing data and headers. - // data - is the result data - // header - contains information about the next chunk of result data, - // most importantly the size of the next data message. - // The header has a fixed size (about 255 bytes) - // header_END - indicates there will be no more msg. - // msg - contains data and header. - // metadata - special xrootd buffer that can only be set once per SendChannelShared - // instance. It is used to send the first header. - // A complete set of results to the czar looks like - // metadata[header_A] -> msg_A[data_A, header_END] - // or - // metadata[header_A] -> msg_A[data_A, header_B] - // -> msg_B[data_B, header_C] -> ... -> msg_X[data_x, header_END] - // - // Since you can't send msg_A until you know the size of data_B, you can't - // transmit until there are at least 2 msg in the queue, or you know - // that msg_A is the last msg in the queue. - // Note that the order of result rows does not matter, but data_B must come after header_B. - // Keep looping until nothing more can be transmitted. - while (_transmitQueue.size() >= 2 || _lastRecvd) { - TransmitData::Ptr thisTransmit = _transmitQueue.front(); - _transmitQueue.pop(); - if (thisTransmit == nullptr) { - throw util::Bug(ERR_LOC, "_transmitLoop() _transmitQueue had nullptr!"); - } - - auto sz = _transmitQueue.size(); - // Is this really the last message for this SharedSendChannel? - bool reallyLast = (_lastRecvd && sz == 0); - - TransmitData::Ptr nextTr; - if (sz != 0) { - nextTr = _transmitQueue.front(); - if (nextTr->getResultSize() == 0) { - LOGS(_log, LOG_LVL_ERROR, - "RESULT SIZE IS 0, this should not happen thisTr=" << thisTransmit->dump() - << " nextTr=" << nextTr->dump()); - } - } - uint32_t seq = _sendChannel->getSeq(); - int scsSeq = ++_scsSeq; - string seqStr = string("seq=" + to_string(seq) + " scsseq=" + to_string(scsSeq) + - " scsId=" + to_string(_scsId)); - thisTransmit->attachNextHeader(nextTr, reallyLast, seq, scsSeq); + : ChannelShared(sendChannel, transmitMgr, czarId) {} - // The first message needs to put its header data in metadata as there's - // no previous message it could attach its header to. - { - lock_guard streamLock(_streamMutex); // Must keep meta and buffer together. - if (_firstTransmit.exchange(false)) { - // Put the header for the first message in metadata - // _metaDataBuf must remain valid until Finished() is called. - std::string thisHeaderString = thisTransmit->getHeaderString(seq, scsSeq - 1); - _metadataBuf = proto::ProtoHeaderWrap::wrap(thisHeaderString); - bool metaSet = _sendChannel->setMetadata(_metadataBuf.data(), _metadataBuf.size()); - if (!metaSet) { - LOGS(_log, LOG_LVL_ERROR, "Failed to setMeta " << idStr); - _kill("metadata"); - return false; - } - } - - // Put the data for the transmit in a StreamBuffer and send it. - // Since the StreamBuffer's lifetime is beyond our control, it needs - // its own Task pointer. - auto streamBuf = thisTransmit->getStreamBuffer(task); - streamBuf->startTimer(); - bool sent = _sendBuf(streamLock, streamBuf, reallyLast, "transmitLoop " + idStr + " " + seqStr, - scsSeq); - - if (!sent) { - LOGS(_log, LOG_LVL_ERROR, "Failed to send " << idStr); - _kill("SendChannelShared::_transmit b"); - return false; - } - } - // If that was the last message, break the loop. - if (reallyLast) return true; - } - return true; -} - -bool SendChannelShared::_sendBuf(lock_guard const& streamLock, xrdsvc::StreamBuffer::Ptr& streamBuf, - bool last, string const& note, int scsSeq) { - bool sent = _sendChannel->sendStream(streamBuf, last, scsSeq); - if (!sent) { - LOGS(_log, LOG_LVL_ERROR, "Failed to transmit " << note << "!"); - return false; - } else { - LOGS(_log, LOG_LVL_INFO, "_sendbuf wait start " << note); - streamBuf->waitForDoneWithThis(); // Block until this buffer has been sent. - } - return sent; -} - -bool SendChannelShared::buildAndTransmitError(util::MultiError& multiErr, Task::Ptr const& task, - bool cancelled) { - util::HoldTrack::Mark mark(ERR_LOC, "SCS buildAndTransmitError"); - auto qId = task->getQueryId(); - bool scanInteractive = true; - _waitTransmitLock(scanInteractive, qId); - lock_guard lock(_tMtx); - // Ignore the existing _transmitData object as it is irrelevant now - // that there's an error. Create a new one to send the error. - TransmitData::Ptr tData = _createTransmit(*task); - _transmitData = tData; - bool largeResult = false; - _transmitData->buildDataMsg(*task, largeResult, multiErr); - LOGS(_log, LOG_LVL_DEBUG, "SendChannelShared::buildAndTransmitError " << _dumpTr()); - bool lastIn = true; - return _prepTransmit(task, cancelled, lastIn); -} - -void SendChannelShared::setSchemaCols(Task& task, std::vector& schemaCols) { - // _schemaCols should be identical for all tasks in this send channel. - if (_schemaColsSet.exchange(true) == false) { - _schemaCols = schemaCols; - // If this is the first time _schemaCols has been set, it is missing - // from the existing _transmitData object - lock_guard lock(_tMtx); - if (_transmitData != nullptr) { - _transmitData->addSchemaCols(_schemaCols); - } - } -} - -bool SendChannelShared::buildAndTransmitResult(MYSQL_RES* mResult, int numFields, Task::Ptr const& task, - bool largeResult, util::MultiError& multiErr, - std::atomic& cancelled, bool& readRowsOk) { +bool SendChannelShared::buildAndTransmitResult(MYSQL_RES* mResult, Task::Ptr const& task, + util::MultiError& multiErr, atomic& cancelled) { util::Timer transmitT; transmitT.start(); double bufferFillSecs = 0.0; @@ -298,69 +62,58 @@ bool SendChannelShared::buildAndTransmitResult(MYSQL_RES* mResult, int numFields // Wait until the transmit Manager says it is ok to send data to the czar. auto qId = task->getQueryId(); bool scanInteractive = task->getScanInteractive(); - _waitTransmitLock(scanInteractive, qId); + waitTransmitLock(scanInteractive, qId); // Lock the transmit mutex until this is done. - lock_guard lock(_tMtx); - // Initialize _transmitData, if needed. - _initTransmit(*task); + lock_guard const tMtxLock(tMtx); + + // Initialize transmitData, if needed. + initTransmit(tMtxLock, *task); - numFields = mysql_num_fields(mResult); bool erred = false; size_t tSize = 0; int bytesTransmitted = 0; int rowsTransmitted = 0; - // If fillRows returns false, _transmitData is full and needs to be transmitted + // If fillRows returns false, transmitData is full and needs to be transmitted // fillRows returns true when there are no more rows in mResult to add. // tSize is set by fillRows. bool more = true; while (more && !cancelled) { util::Timer bufferFillT; bufferFillT.start(); - more = !_transmitData->fillRows(mResult, numFields, tSize); - if (tSize > proto::ProtoHeaderWrap::PROTOBUFFER_HARD_LIMIT) { - LOGS_ERROR("Message single row too large to send using protobuffer"); - erred = true; - util::Error worker_err(util::ErrorCode::INTERNAL, - "Message single row too large to send using protobuffer"); - multiErr.push_back(worker_err); - break; - } - bytesTransmitted += _transmitData->getResultSize(); - rowsTransmitted += _transmitData->getResultRowCount(); - _transmitData->buildDataMsg(*task, largeResult, multiErr); + more = !transmitData->fillRows(mResult, tSize); + bytesTransmitted += transmitData->getResultSize(); + rowsTransmitted += transmitData->getResultRowCount(); + transmitData->buildDataMsg(*task, multiErr); bufferFillT.stop(); bufferFillSecs += bufferFillT.getElapsed(); LOGS(_log, LOG_LVL_TRACE, "buildAndTransmitResult() more=" << more << " " << task->getIdStr() << " seq=" << task->getTSeq() - << _dumpTr()); + << dumpTransmit(tMtxLock)); // This will become true only if this is the last task sending its last transmit. - // _prepTransmit will add the message to the queue and may try to transmit it now. + // prepTransmit will add the message to the queue and may try to transmit it now. bool lastIn = false; if (more) { - if (readRowsOk && !_prepTransmit(task, cancelled, lastIn)) { + if (!prepTransmit(tMtxLock, task, cancelled, lastIn)) { LOGS(_log, LOG_LVL_ERROR, "Could not transmit intermediate results."); - readRowsOk = false; // Empty the fillRows data and then return false. erred = true; break; } } else { - lastIn = transmitTaskLast(true); + lastIn = transmitTaskLast(); // If 'lastIn', this is the last transmit and it needs to be added. - // Otherwise, just append the next query result rows to the existing _transmitData + // Otherwise, just append the next query result rows to the existing transmitData // and send it later. - if (lastIn && readRowsOk && !_prepTransmit(task, cancelled, lastIn)) { + if (lastIn && !prepTransmit(tMtxLock, task, cancelled, lastIn)) { LOGS(_log, LOG_LVL_ERROR, "Could not transmit intermediate results."); - readRowsOk = false; // Empty the fillRows data and then return false. erred = true; break; } } } - transmitT.stop(); double timeSeconds = transmitT.getElapsed(); auto qStats = task->getQueryStats(); @@ -371,60 +124,7 @@ bool SendChannelShared::buildAndTransmitResult(MYSQL_RES* mResult, int numFields LOGS(_log, LOG_LVL_TRACE, "TaskTransmit time=" << timeSeconds << " bufferFillSecs=" << bufferFillSecs); } - return erred; } -void SendChannelShared::_initTransmit(Task& task) { - LOGS(_log, LOG_LVL_TRACE, "_initTransmit " << task.getIdStr() << " seq=" << task.getTSeq()); - if (_transmitData == nullptr) { - _transmitData = _createTransmit(task); - } -} - -TransmitData::Ptr SendChannelShared::_createTransmit(Task& task) { - LOGS(_log, LOG_LVL_TRACE, "_createTransmit " << task.getIdStr() << " seq=" << task.getTSeq()); - auto tData = wbase::TransmitData::createTransmitData(_czarId, task.getIdStr()); - tData->initResult(task, _schemaCols); - return tData; -} - -bool SendChannelShared::_prepTransmit(Task::Ptr const& task, bool cancelled, bool lastIn) { - auto qId = task->getQueryId(); - int jId = task->getJobId(); - - QSERV_LOGCONTEXT_QUERY_JOB(qId, jId); - LOGS(_log, LOG_LVL_DEBUG, "_transmit lastIn=" << lastIn); - if (isDead()) { - LOGS(_log, LOG_LVL_INFO, "aborting transmit since sendChannel is dead."); - return false; - } - - // Have all rows already been read, or an error? - bool erred = _transmitData->hasErrormsg(); - - bool success = _addTransmit(task, cancelled, erred, lastIn, _transmitData, qId, jId); - - // Now that _transmitData is on the queue, reset and initialize a new one. - _transmitData.reset(); - _initTransmit(*task); // reset _transmitData - - return success; -} - -string SendChannelShared::dumpTr() const { - lock_guard lock(_tMtx); - return _dumpTr(); -} - -string SendChannelShared::_dumpTr() const { - string str = "scs::dumpTr "; - if (_transmitData == nullptr) { - str += "nullptr"; - } else { - str += _transmitData->dump(); - } - return str; -} - } // namespace lsst::qserv::wbase diff --git a/src/wbase/SendChannelShared.h b/src/wbase/SendChannelShared.h index bd1392427f..47a531c55a 100644 --- a/src/wbase/SendChannelShared.h +++ b/src/wbase/SendChannelShared.h @@ -23,31 +23,28 @@ #define LSST_QSERV_WBASE_SENDCHANNELSHARED_H // System headers -#include +#include #include -#include -#include -#include -#include - -// Third-party headers -#include +#include // Qserv headers -#include "util/InstanceCount.h" -#include "wbase/SendChannel.h" -#include "wbase/TransmitData.h" +#include "qmeta/types.h" +#include "wbase/ChannelShared.h" -namespace lsst::qserv { +namespace lsst::qserv::wbase { +class SendChannel; +class Task; +} // namespace lsst::qserv::wbase -namespace wcontrol { +namespace lsst::qserv::wcontrol { class TransmitMgr; -class TransmitLock; -} // namespace wcontrol +} -namespace wbase { +namespace lsst::qserv::util { +class MultiError; +} -class Task; +namespace lsst::qserv::wbase { /// A class that provides a SendChannel object with synchronization so it can be /// shared by across multiple threads. @@ -74,7 +71,7 @@ class Task; /// '_lastRecvd' is also set to true when an error message is sent. When /// there's an error, the czar will throw out all data related to the /// chunk, since it is unreliable. The error needs to be sent immediately to -/// waste as little time processing useless results as possible. +/// waste as little time processing useless results as possible /// /// Cancellation is tricky, it's easy to introduce race conditions that would /// result in deadlock. It should work correctly given the following: @@ -84,230 +81,27 @@ class Task; /// - buildAndTransmitError() error must be allowed to attempt to transmit /// even if the Task has been cancelled. This prevents other Tasks getting /// wedged waiting for data to be queued. -class SendChannelShared { +class SendChannelShared : public ChannelShared { public: using Ptr = std::shared_ptr; - static std::atomic scsSeqId; ///< Source for unique _scsId numbers + static Ptr create(std::shared_ptr const& sendChannel, + std::shared_ptr const& transmitMgr, qmeta::CzarId czarId); SendChannelShared() = delete; SendChannelShared(SendChannelShared const&) = delete; SendChannelShared& operator=(SendChannelShared const&) = delete; + virtual ~SendChannelShared() override = default; - static Ptr create(SendChannel::Ptr const& sendChannel, - std::shared_ptr const& transmitMgr, qmeta::CzarId czarId); - - ~SendChannelShared(); - - /// Wrappers for SendChannel public functions that may need to be used - /// by threads. - /// @see SendChannel::send - bool send(char const* buf, int bufLen) { - std::lock_guard sLock(_streamMutex); - return _send(buf, bufLen); - } - - /// @see SendChannel::sendError - bool sendError(std::string const& msg, int code) { - std::lock_guard sLock(_streamMutex); - return _sendError(msg, code); - } - - /// @see SendChannel::sendFile - bool sendFile(int fd, SendChannel::Size fSize) { - std::lock_guard sLock(_streamMutex); - return _sendFile(fd, fSize); - } - - /// @see SendChannel::sendStream - bool sendStream(xrdsvc::StreamBuffer::Ptr const& sBuf, bool last, int scsSeq = -1) { - std::lock_guard sLock(_streamMutex); - return _sendStream(sBuf, last, scsSeq); - } - - /// @see SendChannel::kill - bool kill(std::string const& note) { - std::lock_guard sLock(_streamMutex); - return _kill(note); - } - - /// @see SendChannel::isDead - bool isDead() { - if (_sendChannel == nullptr) return true; - return _sendChannel->isDead(); - } - - /// Set the number of Tasks that will be sent using this SendChannel. - /// This should not be changed once set. - void setTaskCount(int taskCount); - int getTaskCount() const { return _taskCount; } - - /// @return true if inLast is true and this is the last task to call this - /// with inLast == true. - /// The calling Thread must hold 'streamMutex' before calling this. - bool transmitTaskLast(bool inLast); - - /// Return a normalized id string. - static std::string makeIdStr(int qId, int jId); - - /// @return the channel sequence number (this will not be valid until after - /// the channel is open.) - uint64_t getSeq() const { return _sendChannel->getSeq(); } - - /// @return the sendChannelShared sequence number, which is always valid. - uint64_t getScsId() const { return _scsId; } - - /// @return the current sql connection count - int getSqlConnectionCount() { return _sqlConnectionCount; } - - /// @return the sql connection count after incrementing by 1. - int incrSqlConnectionCount() { return ++_sqlConnectionCount; } - - /// @return true if this is the first time this function has been called. - bool getFirstChannelSqlConn() { return _firstChannelSqlConn.exchange(false); } - - /// Set the schemaCols. All tasks using this send channel should have - /// the same schema. - void setSchemaCols(Task& task, std::vector& schemaCols); - - /// Transmit data object indicating the errors in 'multiErr'. - /// @return true if the error is transmitted. - /// Errors transmissions are attempted even if cancelled is true. - bool buildAndTransmitError(util::MultiError& multiErr, std::shared_ptr const& task, bool cancelled); - - /// Put the SQL results in a TransmitData object and transmit it to the czar - /// if appropriate. - /// @ return true if there was an error. - /// Note: `cancelled` is a reference used to break the transmit loop if the calling Task - /// is cancelled. Having anything else set `cancelled` to true could result in deadlock. - bool buildAndTransmitResult(MYSQL_RES* mResult, int numFields, std::shared_ptr const& task, - bool largeResult, util::MultiError& multiErr, std::atomic& cancelled, - bool& readRowsOk); - - /// @return a log worthy string describing _transmitData. - std::string dumpTr() const; + virtual bool buildAndTransmitResult(MYSQL_RES* mResult, std::shared_ptr const& task, + util::MultiError& multiErr, std::atomic& cancelled) override; private: /// Private constructor to protect shared pointer integrity. - SendChannelShared(SendChannel::Ptr const& sendChannel, + SendChannelShared(std::shared_ptr const& sendChannel, std::shared_ptr const& transmitMgr, qmeta::CzarId czarId); - - /// Items to share one TransmitLock across all Task's using this - /// SendChannelShared. If all Task's using this channel are not - /// allowed to complete, deadlock is likely. - void _waitTransmitLock(bool interactive, QueryId const& qId); - - /// Wrappers for SendChannel public functions that may need to be used - /// by threads. - /// @see SendChannel::send - /// Note: _streamLock must be held before calling this function. - bool _send(char const* buf, int bufLen) { return _sendChannel->send(buf, bufLen); } - - /// @see SendChannel::sendError - /// Note: _streamLock must be held before calling this function. - bool _sendError(std::string const& msg, int code) { return _sendChannel->sendError(msg, code); } - - /// @see SendChannel::sendFile - /// Note: _streamLock must be held before calling this function. - bool _sendFile(int fd, SendChannel::Size fSize) { return _sendChannel->sendFile(fd, fSize); } - - /// @see SendChannel::sendStream - /// Note: _streamLock must be held before calling this function. - bool _sendStream(xrdsvc::StreamBuffer::Ptr const& sBuf, bool last, int scsSeq = -1) { - return _sendChannel->sendStream(sBuf, last, scsSeq); - } - - /// @see SendChannel::kill - /// Note: _streamLock must be held before calling this function. - bool _kill(std::string const& note); - - /// @return a new TransmitData::Ptr object. - TransmitData::Ptr _createTransmit(Task& task); - - /// Create a new _transmitData object if needed. - /// Note: tMtx must be held before calling. - void _initTransmit(Task& task); - - /// Try to transmit the data in tData. - /// If the queue already has at least 2 TransmitData objects, addTransmit - /// may wait before returning. Result rows are read from the - /// database until there are no more rows or the buffer is - /// sufficiently full. addTransmit waits until that buffer has been - /// sent to the czar before reading more rows. Without the wait, - /// the worker may read in too many result rows, run out of memory, - /// and crash. - /// @return true if transmit was added successfully. - /// @see SendChannelShared::_transmit code for further explanation. - bool _addTransmit(std::shared_ptr const& task, bool cancelled, bool erred, bool lastIn, - TransmitData::Ptr const& tData, int qId, int jId); - - /// Encode TransmitData items from _transmitQueue and pass them to XrdSsi - /// to be sent to the czar. - /// The header for the 'nextTransmit' item is appended to the result of - /// 'thisTransmit', with a specially constructed header appended for the - /// 'reallyLast' transmit. - /// The specially constructed header for the 'reallyLast' transmit just - /// says that there's no more data, this SendChannel is done. - /// _queueMtx must be held before calling this. - bool _transmit(bool erred, std::shared_ptr const& task); - - /// Send the buffer 'streamBuffer' using xrdssi. - /// 'last' should only be true if this is the last buffer to be sent with this _sendChannel. - /// 'note' is just a log note about what/who is sending the buffer. - /// @return true if the buffer was sent. - bool _sendBuf(std::lock_guard const& streamLock, xrdsvc::StreamBuffer::Ptr& streamBuf, - bool last, std::string const& note, int scsSeq); - - /// Prepare the transmit data and then call _addTransmit. - bool _prepTransmit(std::shared_ptr const& task, bool cancelled, bool lastIn); - - /// @see dumpTr() - std::string _dumpTr() const; - - /// streamMutex is used to protect _lastCount and messages that are sent - /// using SendChannelShared. - std::mutex _streamMutex; - - std::queue _transmitQueue; ///< Queue of data to be encoded and sent. - std::mutex _queueMtx; ///< protects _transmitQueue - - /// metadata buffer. Once set, it cannot change until after Finish() has been called. - std::string _metadataBuf; - - int _taskCount = 0; ///< The number of tasks to be sent over this SendChannel. - int _lastCount = 0; ///< The number of 'last' buffers received. - std::atomic _lastRecvd{false}; ///< The truly 'last' transmit message is in the queue. - std::atomic _firstTransmit{true}; ///< True until the first transmit has been sent. - - SendChannel::Ptr _sendChannel; ///< Used to pass encoded information to XrdSsi. - - std::atomic _firstTransmitLock{true}; ///< True until the first thread tries to lock transmitLock. - std::shared_ptr _transmitLock; ///< Hold onto transmitLock until finished. - std::mutex _transmitLockMtx; ///< protects access to _transmitLock. - std::condition_variable _transmitLockCv; - std::shared_ptr _transmitMgr; ///< Pointer to the TransmitMgr - qmeta::CzarId const _czarId; ///< id of the czar that requested this task(s). - uint64_t const _scsId; ///< id number for this SendChannelShared - std::atomic _scsSeq{0}; ///< SendChannelSharedsequence number for transmit. - - /// The number of sql connections opened to handle the Tasks using this SendChannelShared. - /// Once this is greater than 0, this object needs free access to sql connections to avoid - // system deadlock. @see SqlConnMgr::_take() and SqlConnMgr::_release(). - std::atomic _sqlConnectionCount{0}; - - /// true until getFirstChannelSqlConn() is called. - std::atomic _firstChannelSqlConn{true}; - - std::vector _schemaCols; - std::atomic _schemaColsSet{false}; - - std::shared_ptr _transmitData; ///< TransmitData object - mutable std::mutex _tMtx; ///< protects _transmitData - - std::shared_ptr _icPtr; ///< temporary for LockupDB }; -} // namespace wbase -} // namespace lsst::qserv +} // namespace lsst::qserv::wbase #endif // LSST_QSERV_WBASE_SENDCHANNELSHARED_H diff --git a/src/wbase/Task.cc b/src/wbase/Task.cc index f4e7845bba..3fa7b1c63d 100644 --- a/src/wbase/Task.cc +++ b/src/wbase/Task.cc @@ -35,7 +35,6 @@ #include // Third-party headers -#include "boost/regex.hpp" #include // LSST headers @@ -44,6 +43,8 @@ // Qserv headers #include "global/constants.h" #include "global/LogContext.h" +#include "global/UnsupportedError.h" +#include "mysql/MySqlConfig.h" #include "proto/TaskMsgDigest.h" #include "proto/worker.pb.h" #include "util/Bug.h" @@ -51,7 +52,7 @@ #include "util/IterableFormatter.h" #include "util/TimeUtils.h" #include "wbase/Base.h" -#include "wbase/SendChannelShared.h" +#include "wbase/ChannelShared.h" #include "wbase/UserQueryInfo.h" #include "wpublish/QueriesAndChunks.h" @@ -100,7 +101,7 @@ std::atomic taskSequence{0}; /// Command::setFunc() is used set the action later. This is why /// the util::CommandThreadPool is not called here. Task::Task(TaskMsgPtr const& t, int fragmentNumber, std::shared_ptr const& userQueryInfo, - size_t templateId, int subchunkId, std::shared_ptr const& sc) + size_t templateId, int subchunkId, std::shared_ptr const& sc) : _userQueryInfo(userQueryInfo), _sendChannel(sc), _tSeq(++taskSequence), @@ -190,7 +191,10 @@ Task::~Task() { } std::vector Task::createTasks(std::shared_ptr const& taskMsg, - std::shared_ptr const& sendChannel) { + std::shared_ptr const& sendChannel, + std::shared_ptr const& chunkResourceMgr, + mysql::MySqlConfig const& mySqlConfig, + std::shared_ptr const& sqlConnMgr) { QueryId qId = taskMsg->queryid(); QSERV_LOGCONTEXT_QUERY_JOB(qId, taskMsg->jobid()); std::vector vect; @@ -222,6 +226,39 @@ std::vector Task::createTasks(std::shared_ptr const& } } } + for (auto task : vect) { + /// Set the function called when it is time to process the task. + auto func = [task, chunkResourceMgr, mySqlConfig, sqlConnMgr](util::CmdData*) { + proto::TaskMsg const& msg = *task->msg; + int const resultProtocol = 2; // See proto/worker.proto Result protocol + if (!msg.has_protocol() || msg.protocol() < resultProtocol) { + LOGS(_log, LOG_LVL_WARN, "processMsg Unsupported wire protocol"); + if (!task->checkCancelled()) { + // We should not send anything back to xrootd if the task has been cancelled. + task->getSendChannel()->sendError("Unsupported wire protocol", 1); + } + } else { + auto qr = wdb::QueryRunner::newQueryRunner(task, chunkResourceMgr, mySqlConfig, sqlConnMgr); + bool success = false; + try { + success = qr->runQuery(); + } catch (UnsupportedError const& e) { + LOGS(_log, LOG_LVL_ERROR, "runQuery threw UnsupportedError " << e.what() << *task); + } + if (not success) { + LOGS(_log, LOG_LVL_ERROR, "runQuery failed " << *task); + if (not task->getSendChannel()->kill("Foreman::_setRunFunc")) { + LOGS(_log, LOG_LVL_WARN, "runQuery sendChannel killed"); + } + } + } + // Transmission is done, but 'task' contains statistics that are still useful. + // However, the resources used by sendChannel need to be freed quickly. + // The QueryRunner class access to sendChannel for results is over by this point. + task->resetSendChannel(); // Frees its xrdsvc::SsiRequest object. + }; + task->setFunc(func); + } sendChannel->setTaskCount(vect.size()); return vect; diff --git a/src/wbase/Task.h b/src/wbase/Task.h index d0e15c8e0e..a8e575b6d9 100644 --- a/src/wbase/Task.h +++ b/src/wbase/Task.h @@ -46,18 +46,25 @@ #include "wbase/TaskState.h" #include "util/Histogram.h" #include "util/ThreadPool.h" -#include "util/threadSafe.h" // Forward declarations namespace lsst::qserv { +namespace mysql { +class MySqlConfig; +} // namespace mysql namespace proto { class TaskMsg; class TaskMsg_Fragment; } // namespace proto namespace wbase { -struct ScriptMeta; -class SendChannelShared; +class ChannelShared; } // namespace wbase +namespace wcontrol { +class SqlConnMgr; +} // namespace wcontrol +namespace wdb { +class ChunkResourceMgr; +} // namespace wdb namespace wpublish { class QueryStatistics; } @@ -150,19 +157,22 @@ class Task : public util::CommandForThreadPool { }; Task(TaskMsgPtr const& t, int fragmentNumber, std::shared_ptr const& userQueryInfo, - size_t templateId, int subchunkId, std::shared_ptr const& sc); + size_t templateId, int subchunkId, std::shared_ptr const& sc); Task& operator=(const Task&) = delete; Task(const Task&) = delete; virtual ~Task(); /// Read 'taskMsg' to generate a vector of one or more task objects all using the same 'sendChannel' static std::vector createTasks(std::shared_ptr const& taskMsg, - std::shared_ptr const& sendChannel); + std::shared_ptr const& sendChannel, + std::shared_ptr const& chunkResourceMgr, + mysql::MySqlConfig const& mySqlConfig, + std::shared_ptr const& sqlConnMgr); void setQueryStatistics(std::shared_ptr const& qC); - std::shared_ptr getSendChannel() const { return _sendChannel; } - void resetSendChannel() { _sendChannel.reset(); } ///< reset the shared pointer for SendChannelShared + std::shared_ptr getSendChannel() const { return _sendChannel; } + void resetSendChannel() { _sendChannel.reset(); } ///< reset the shared pointer for ChannelShared std::string user; ///< Incoming username // Note that manpage spec of "26 bytes" is insufficient @@ -269,8 +279,8 @@ class Task : public util::CommandForThreadPool { const IntVector& getSubchunksVect() const { return _dbTblsAndSubchunks->subchunksVect; } private: - std::shared_ptr _userQueryInfo; ///< Details common to Tasks in this UserQuery. - std::shared_ptr _sendChannel; ///< Send channel. + std::shared_ptr _userQueryInfo; ///< Details common to Tasks in this UserQuery. + std::shared_ptr _sendChannel; ///< Send channel. uint64_t const _tSeq = 0; ///< identifier for the specific task QueryId const _qId = 0; ///< queryId from czar diff --git a/src/wbase/TransmitData.cc b/src/wbase/TransmitData.cc index 936a8eec12..bf78082744 100644 --- a/src/wbase/TransmitData.cc +++ b/src/wbase/TransmitData.cc @@ -50,23 +50,21 @@ LOG_LOGGER _log = LOG_GET("lsst.qserv.wbase.TransmitData"); namespace lsst::qserv::wbase { -std::atomic seqSource{0}; +atomic seqSource{0}; -TransmitData::TransmitData(qmeta::CzarId const& czarId_, shared_ptr const& arena, - std::string const& idStr) - : _czarId(czarId_), _arena(arena), _idStr(idStr), _trSeq(seqSource++) { - _header = _createHeader(); - _result = _createResult(); +TransmitData::Ptr TransmitData::createTransmitData(qmeta::CzarId const& czarId_, string const& idStr) { + return shared_ptr(new TransmitData(czarId_, make_shared(), idStr)); } -TransmitData::Ptr TransmitData::createTransmitData(qmeta::CzarId const& czarId_, string const& idStr) { - shared_ptr arena = make_shared(); - auto ptr = shared_ptr(new TransmitData(czarId_, arena, idStr)); - return ptr; +TransmitData::TransmitData(qmeta::CzarId const& czarId_, shared_ptr const& arena, + string const& idStr) + : _czarId(czarId_), _arena(arena), _idStr(idStr), _trSeq(seqSource++) { + lock_guard const lock(_trMtx); + _header = _createHeader(lock); + _result = _createResult(lock); } -/// Note: _trMtx must be held before calling this. -proto::ProtoHeader* TransmitData::_createHeader() { +proto::ProtoHeader* TransmitData::_createHeader(lock_guard const& lock) { proto::ProtoHeader* hdr = google::protobuf::Arena::CreateMessage(_arena.get()); hdr->set_protocol(2); // protocol 2: row-by-row message hdr->set_size(0); @@ -76,15 +74,15 @@ proto::ProtoHeader* TransmitData::_createHeader() { return hdr; } -proto::Result* TransmitData::_createResult() { +proto::Result* TransmitData::_createResult(lock_guard const& lock) { proto::Result* rst = google::protobuf::Arena::CreateMessage(_arena.get()); return rst; } void TransmitData::attachNextHeader(TransmitData::Ptr const& nextTr, bool reallyLast, uint32_t seq, int scsSeq) { - _icPtr = std::make_shared(_idStr + "_td_LDB_" + std::to_string(reallyLast)); - lock_guard lock(_trMtx); + _icPtr = make_shared(_idStr + "_td_LDB_" + to_string(reallyLast)); + lock_guard const lock(_trMtx); if (_result == nullptr) { throw util::Bug(ERR_LOC, _idStr + "_transmitLoop() had nullptr result!"); } @@ -92,14 +90,14 @@ void TransmitData::attachNextHeader(TransmitData::Ptr const& nextTr, bool really string nextHeaderString; if (reallyLast) { // Need a special header to indicate there are no more messages. - LOGS(_log, LOG_LVL_TRACE, _dump() << " attachNextHeader reallyLast=" << reallyLast); + LOGS(_log, LOG_LVL_TRACE, _dump(lock) << " attachNextHeader reallyLast=" << reallyLast); // this _tMtx is already locked, so call private member - nextHeaderString = _makeHeaderString(reallyLast, seq, scsSeq); + nextHeaderString = _makeHeaderString(lock, reallyLast, seq, scsSeq); } else { // Need the header from the next TransmitData object in the queue. // Using public version to lock its mutex. LOGS(_log, LOG_LVL_TRACE, - _dump() << "attachNextHeader reallyLast=" << reallyLast << " next=" << nextTr->dump()); + _dump(lock) << "attachNextHeader reallyLast=" << reallyLast << " next=" << nextTr->dump()); // next _tMtx is not locked, so call public member nextHeaderString = nextTr->makeHeaderString(reallyLast, seq, scsSeq); } @@ -108,17 +106,17 @@ void TransmitData::attachNextHeader(TransmitData::Ptr const& nextTr, bool really } string TransmitData::makeHeaderString(bool reallyLast, uint32_t seq, int scsSeq) { - lock_guard lock(_trMtx); - return _makeHeaderString(reallyLast, seq, scsSeq); + lock_guard const lock(_trMtx); + return _makeHeaderString(lock, reallyLast, seq, scsSeq); } -string TransmitData::_makeHeaderString(bool reallyLast, uint32_t seq, int scsSeq) { - // Note: _trMtx must be held before calling this. +string TransmitData::_makeHeaderString(lock_guard const& lock, bool reallyLast, uint32_t seq, + int scsSeq) { proto::ProtoHeader* pHeader; if (reallyLast) { // Create a header for an empty dataMsg using the protobuf arena from thisTransmit. // This is the signal to the czar that this SharedSendChannel is finished. - pHeader = _createHeader(); + pHeader = _createHeader(lock); } else { pHeader = _header; } @@ -131,7 +129,7 @@ string TransmitData::_makeHeaderString(bool reallyLast, uint32_t seq, int scsSeq } string TransmitData::getHeaderString(uint32_t seq, int scsSeq) { - lock_guard lock(_trMtx); + lock_guard const lock(_trMtx); proto::ProtoHeader* thisPHdr = _header; thisPHdr->set_seq(seq); thisPHdr->set_scsseq(scsSeq); // should always be 0 @@ -141,14 +139,13 @@ string TransmitData::getHeaderString(uint32_t seq, int scsSeq) { } xrdsvc::StreamBuffer::Ptr TransmitData::getStreamBuffer(Task::Ptr const& task) { - lock_guard lock(_trMtx); + lock_guard const lock(_trMtx); // createWithMove invalidates _dataMsg return xrdsvc::StreamBuffer::createWithMove(_dataMsg, task); } void TransmitData::_buildHeader(lock_guard const& lock) { LOGS(_log, LOG_LVL_DEBUG, _idStr << "TransmitData::_buildHeader"); - // The size of the dataMsg must include space for the header for the next dataMsg. _header->set_size(_dataMsg.size() + proto::ProtoHeaderWrap::getProtoHeaderSize()); // The md5 hash must not include the header for the next dataMsg. @@ -158,6 +155,7 @@ void TransmitData::_buildHeader(lock_guard const& lock) { void TransmitData::buildDataMsg(Task const& task, util::MultiError& multiErr) { QSERV_LOGCONTEXT_QUERY_JOB(task.getQueryId(), task.getJobId()); + lock_guard const lock(_trMtx); LOGS(_log, LOG_LVL_INFO, _idStr << "TransmitData::_buildDataMsg rowCount=" << _rowCount << " tSize=" << _tSize); assert(_result != nullptr); @@ -179,48 +177,24 @@ void TransmitData::buildDataMsg(Task const& task, util::MultiError& multiErr) { _buildHeader(lock); } -void TransmitData::initResult(Task& task, std::vector& schemaCols) { - lock_guard lock(_trMtx); +void TransmitData::initResult(Task& task) { + lock_guard const lock(_trMtx); _result->set_queryid(task.getQueryId()); _result->set_jobid(task.getJobId()); if (task.getSession() >= 0) { _result->set_session(task.getSession()); } - // If no queries have been run, schemaCols will be empty at this point. - if (!schemaCols.empty()) { - _addSchemaCols(schemaCols); - } } bool TransmitData::hasErrormsg() const { return _result->has_errormsg(); } -void TransmitData::addSchemaCols(std::vector& schemaCols) { - lock_guard lock(_trMtx); - _addSchemaCols(schemaCols); -} - -void TransmitData::_addSchemaCols(std::vector& schemaCols) { - // Load schema from _schemaCols into _result, this should only happen once - // per TransmitData object. - if (_schemaColsSet.exchange(true) == false) { - for (auto&& col : schemaCols) { - proto::ColumnSchema* cs = _result->mutable_rowschema()->add_columnschema(); - cs->set_name(col.colName); - cs->set_sqltype(col.colSqlType); - cs->set_mysqltype(col.colMysqlType); - } - } else { - LOGS(_log, LOG_LVL_WARN, _idStr << "TransmitData::_addSchemaCols called multiple times."); - } -} - -bool TransmitData::fillRows(MYSQL_RES* mResult, int numFields, size_t& sz) { - lock_guard lock(_trMtx); +bool TransmitData::fillRows(MYSQL_RES* mResult, size_t& sz) { + lock_guard const lock(_trMtx); MYSQL_ROW row; - unsigned int szLimit = std::min(proto::ProtoHeaderWrap::PROTOBUFFER_DESIRED_LIMIT, - proto::ProtoHeaderWrap::PROTOBUFFER_HARD_LIMIT); - + int const numFields = mysql_num_fields(mResult); + unsigned int szLimit = min(proto::ProtoHeaderWrap::PROTOBUFFER_DESIRED_LIMIT, + proto::ProtoHeaderWrap::PROTOBUFFER_HARD_LIMIT); while ((row = mysql_fetch_row(mResult))) { auto lengths = mysql_fetch_lengths(mResult); proto::RowBundle* rawRow = _result->add_row(); @@ -247,16 +221,26 @@ bool TransmitData::fillRows(MYSQL_RES* mResult, int numFields, size_t& sz) { } int TransmitData::getResultSize() const { - lock_guard lock(_trMtx); + lock_guard const lock(_trMtx); return _dataMsg.size(); } int TransmitData::getResultRowCount() const { - lock_guard lock(_trMtx); + lock_guard const lock(_trMtx); return _rowCount; } -string TransmitData::_dump() const { +string TransmitData::dump() const { + lock_guard const lock(_trMtx); + return _dump(lock); +} + +string TransmitData::dataMsg() const { + lock_guard const lock(_trMtx); + return _dataMsg; +} + +string TransmitData::_dump(lock_guard const& lock) const { string str = string(" trDump ") + _idStr + " trSeq=" + to_string(_trSeq) + " hdr="; if (_header != nullptr) { str += to_string(_header->size()); diff --git a/src/wbase/TransmitData.h b/src/wbase/TransmitData.h index 5c1e9f1cea..7f0e438d10 100644 --- a/src/wbase/TransmitData.h +++ b/src/wbase/TransmitData.h @@ -36,20 +36,22 @@ namespace google::protobuf { class Arena; -} // namespace google::protobuf +} namespace lsst::qserv::util { class InstanceCount; class MultiError; } // namespace lsst::qserv::util -namespace xrdsvc { +namespace lsst::qserv::wbase { +class Task; +} + +namespace lsst::qserv::xrdsvc { class StreamBuffer; } -namespace wbase { - -class Task; +namespace lsst::qserv::wbase { /// This class is used to store information needed for one transmit. /// The data may be for result rows or an error message. @@ -64,8 +66,11 @@ class TransmitData { /// Create a transmitData object static Ptr createTransmitData(qmeta::CzarId const& czarId_, std::string const& idStr); + qmeta::CzarId getCzarId() const { return _czarId; } + std::string getIdStr() const { return _idStr; } + /// Initialize the result. - void initResult(Task& task, std::vector& schemaCols); + void initResult(Task& task); /// @return a string representation of this transmit object's header /// generated by protobufs. @@ -87,19 +92,12 @@ class TransmitData { /// @return the protobuf string for the header. std::string makeHeaderString(bool reallyLast, uint32_t seq, int scsSeq); - qmeta::CzarId getCzarId() const { return _czarId; } - /// Fill one row in the _result msg from one row in MYSQL_RES* 'mResult' /// If the message has gotten larger than the desired message size, /// return false. /// @return false if there ARE MORE ROWS left in mResult. /// true if there are no more rows remaining in mResult. - bool fillRows(MYSQL_RES* mResult, int numFields, size_t& sz); - - /// Add the schema to this TransmitData object. - /// The schema is always the same for a query, so it will only be added the - /// first time this function is called for the instance. - void addSchemaCols(std::vector& schemaCols); + bool fillRows(MYSQL_RES* mResult, size_t& sz); /// Use the information collected in _result and multiErr to build _dataMsg. void buildDataMsg(Task const& task, util::MultiError& multiErr); @@ -110,16 +108,14 @@ class TransmitData { /// @return the size of the result in bytes. int getResultSize() const; - /// Return the number of rows in the result. + /// @return the number of rows in the result. int getResultRowCount() const; - std::string getIdStr() const { return _idStr; } - /// @return a log worthy description of this object. - std::string dump() const { - std::lock_guard lock(_trMtx); - return _dump(); - } + std::string dump() const; + + /// @return a copy of the data message + std::string dataMsg() const; private: TransmitData(qmeta::CzarId const& czarId, std::shared_ptr const& arena, @@ -130,48 +126,46 @@ class TransmitData { /// case where an empty header is needed to append to the result. /// This should only be appended to the result of this->result as /// the '_arena' will be deallocated when this object is destroyed. - /// Note: _trMtx must be held before calling this. - proto::ProtoHeader* _createHeader(); + /// @param lock - on _trMtx must be held before calling this methid. + proto::ProtoHeader* _createHeader(std::lock_guard const& lock); - /// Note: _trMtx must be held before calling this. - std::string _makeHeaderString(bool reallyLast, uint32_t seq, int scsSeq); + /// @param lock - on _trMtx must be held before calling this methid. + std::string _makeHeaderString(std::lock_guard const& lock, bool reallyLast, uint32_t seq, + int scsSeq); /// @param lock - on _trMtx must be held before calling this methid. void _buildHeader(std::lock_guard const& lock); /// @see dump() - std::string _dump() const; - - // proto objects are instantiated as part of google protobuf arenas - // and should not be deleted. They are deleted when the arena is deleted. - proto::ProtoHeader* _header = nullptr; - proto::Result* _result = nullptr; - - /// Serialized string for result that is appended with wrapped string for headerNext. - std::string _dataMsg; - - qmeta::CzarId const _czarId; - - mutable std::mutex _trMtx; ///< Protects all private member variables. - std::atomic _schemaColsSet{false}; ///< Set to true when schema columns are set. - - unsigned int _rowCount = 0; ///< Number of rows in the _result so far. - size_t _tSize = 0; ///< Approximate number of bytes in the _result so far. + /// @param lock - on _trMtx must be held before calling this methid. + std::string _dump(std::lock_guard const& lock) const; /// Create a result using our arena. /// This does not set the 'result' member of this object for consistency. - proto::Result* _createResult(); - - std::shared_ptr _arena; + /// @param lock - on _trMtx must be held before calling this methid. + proto::Result* _createResult(std::lock_guard const& lock); + // Input parameters + qmeta::CzarId const _czarId; + std::shared_ptr const _arena; std::string const _idStr; - int const _trSeq; ///< Identifier for this object, used for debugging. + // Proto objects are instantiated as part of google protobuf arenas + // and should not be deleted. They are deleted when the arena is deleted. + proto::ProtoHeader* _header = nullptr; + proto::Result* _result = nullptr; + + std::string _dataMsg; ///< Serialized string for result that is appended with wrapped + /// string for headerNext. + unsigned int _rowCount = 0; ///< Number of rows in the _result so far. + size_t _tSize = 0; ///< Approximate number of bytes in the _result so far. + int const _trSeq; ///< Identifier for this object, used for debugging. std::shared_ptr _icPtr; // LockupDB + + mutable std::mutex _trMtx; ///< Protects all private member variables. }; -} // namespace wbase -} // namespace lsst::qserv +} // namespace lsst::qserv::wbase #endif // LSST_QSERV_WBASE_TRANSMITDATA_H diff --git a/src/wcontrol/Foreman.cc b/src/wcontrol/Foreman.cc index dd6da2845e..0f063d901f 100644 --- a/src/wcontrol/Foreman.cc +++ b/src/wcontrol/Foreman.cc @@ -26,28 +26,18 @@ // System headers #include -#include -#include -#include -#include -#include -#include // LSST headers #include "lsst/log/Log.h" // Qserv headers -#include "global/UnsupportedError.h" #include "mysql/MySqlConfig.h" -#include "proto/worker.pb.h" -#include "wbase/Base.h" -#include "wbase/SendChannelShared.h" #include "wbase/WorkerCommand.h" #include "wcontrol/SqlConnMgr.h" -#include "wcontrol/TransmitMgr.h" #include "wcontrol/WorkerStats.h" #include "wdb/ChunkResource.h" -#include "wdb/QueryRunner.h" +#include "wdb/SQLBackend.h" +#include "wpublish/QueriesAndChunks.h" namespace { LOG_LOGGER _log = LOG_GET("lsst.qserv.wcontrol.Foreman"); @@ -59,16 +49,21 @@ namespace lsst::qserv::wcontrol { Foreman::Foreman(Scheduler::Ptr const& scheduler, unsigned int poolSize, unsigned int maxPoolThreads, mysql::MySqlConfig const& mySqlConfig, wpublish::QueriesAndChunks::Ptr const& queries, - wcontrol::SqlConnMgr::Ptr const& sqlConnMgr) - - : _scheduler(scheduler), _mySqlConfig(mySqlConfig), _queries(queries), _sqlConnMgr(sqlConnMgr) { + std::shared_ptr const& sqlConnMgr, + std::shared_ptr const& transmitMgr, + wconfig::WorkerConfig const& workerConfig) + : _scheduler(scheduler), + _mySqlConfig(mySqlConfig), + _queries(queries), + _sqlConnMgr(sqlConnMgr), + _transmitMgr(transmitMgr), + _workerConfig(workerConfig) { // Make the chunk resource mgr // Creating backend makes a connection to the database for making temporary tables. // It will delete temporary tables that it can identify as being created by a worker. // Previous instances of the worker will terminate when they try to use or create temporary tables. // Previous instances of the worker should be terminated before a new worker is started. - _backend = make_shared(_mySqlConfig); - _chunkResourceMgr = wdb::ChunkResourceMgr::newMgr(_backend); + _chunkResourceMgr = wdb::ChunkResourceMgr::newMgr(make_shared(_mySqlConfig)); assert(_scheduler); // Cannot operate without scheduler. @@ -90,39 +85,9 @@ Foreman::~Foreman() { _pool->shutdownPool(); } -void Foreman::_setRunFunc(shared_ptr const& task) { - // If there are no problems, this lambda function creates - // a QueryRunner instance for this Task and then runs - // QueryRunner::runQuery() for the Task. - auto func = [this, task](util::CmdData*) { - auto qr = wdb::QueryRunner::newQueryRunner(task, _chunkResourceMgr, _mySqlConfig, _sqlConnMgr); - bool success = false; - try { - success = qr->runQuery(); - } catch (UnsupportedError const& e) { - LOGS(_log, LOG_LVL_ERROR, "runQuery threw UnsupportedError " << e.what() << *task); - } - if (not success) { - LOGS(_log, LOG_LVL_ERROR, "runQuery failed " << *task); - if (not task->getSendChannel()->kill("Foreman::_setRunFunc")) { - LOGS(_log, LOG_LVL_WARN, "runQuery sendChannel killed"); - } - } - - // Transmission is done, but 'task' contains statistics that are still useful. - // However, the resources used by sendChannel need to be freed quickly. - // The QueryRunner class access to sendChannel for results is over by this point. - task->resetSendChannel(); // Frees its xrdsvc::SsiRequest object. - }; - - task->setFunc(func); -} - -/// Put the task on the scheduler to be run later. void Foreman::processTasks(vector const& tasks) { std::vector cmds; for (auto const& task : tasks) { - _setRunFunc(task); _queries->addTask(task); cmds.push_back(task); } diff --git a/src/wcontrol/Foreman.h b/src/wcontrol/Foreman.h index 3da02fed23..8796b425a9 100644 --- a/src/wcontrol/Foreman.h +++ b/src/wcontrol/Foreman.h @@ -33,12 +33,11 @@ #include "nlohmann/json.hpp" // Qserv headers -#include "mysql/MySqlConfig.h" #include "util/EventThread.h" #include "util/HoldTrack.h" #include "wbase/Base.h" #include "wbase/MsgProcessor.h" -#include "wpublish/QueriesAndChunks.h" +#include "wbase/Task.h" // Forward declarations @@ -46,15 +45,27 @@ namespace lsst::qserv::wbase { struct TaskSelector; } // namespace lsst::qserv::wbase +namespace lsst::qserv::mysql { +class MySqlConfig; +} // namespace lsst::qserv::mysql + +namespace lsst::qserv::wconfig { +class WorkerConfig; +} // namespace lsst::qserv::wconfig + namespace lsst::qserv::wdb { -class SQLBackend; class ChunkResourceMgr; class QueryRunner; } // namespace lsst::qserv::wdb +namespace lsst::qserv::wpublish { +class QueriesAndChunks; +} // namespace lsst::qserv::wpublish + namespace lsst::qserv::wcontrol { class SqlConnMgr; +class TransmitMgr; /// An abstract scheduler interface. Foreman objects use Scheduler instances /// to determine what tasks to launch upon triggering events. @@ -84,14 +95,19 @@ class Scheduler : public wbase::TaskScheduler, public util::CommandQueue { class Foreman : public wbase::MsgProcessor { public: /** - * @param scheduler - pointer to the scheduler - * @param poolSize - size of the thread pool - * @param mySqlConfig - configuration object for the MySQL service - * @param queries - query statistics collector + * @param scheduler - pointer to the scheduler + * @param poolSize - size of the thread pool + * @param mySqlConfig - configuration object for the MySQL service + * @param queries - query statistics collector + * @param sqlConnMgr - for limiting the number of MySQL connections used for tasks + * @param transmitMgr - for throttling outgoing massages to prevent czars from being overloaded + * @param workerConfig - worker configuration parameters */ Foreman(Scheduler::Ptr const& scheduler, unsigned int poolSize, unsigned int maxPoolThreads, - mysql::MySqlConfig const& mySqlConfig, wpublish::QueriesAndChunks::Ptr const& queries, - std::shared_ptr const& sqlConnMgr); + mysql::MySqlConfig const& mySqlConfig, std::shared_ptr const& queries, + std::shared_ptr const& sqlConnMgr, + std::shared_ptr const& transmitMgr, + wconfig::WorkerConfig const& workerConfig); virtual ~Foreman() override; @@ -100,8 +116,14 @@ class Foreman : public wbase::MsgProcessor { Foreman(Foreman const&) = delete; Foreman& operator=(Foreman const&) = delete; + std::shared_ptr const& chunkResourceMgr() const { return _chunkResourceMgr; } + mysql::MySqlConfig const& mySqlConfig() const { return _mySqlConfig; } + std::shared_ptr const& sqlConnMgr() const { return _sqlConnMgr; } + std::shared_ptr const& transmitMgr() const { return _transmitMgr; } + wconfig::WorkerConfig const& workerConfig() const { return _workerConfig; } + /// Process a group of query processing tasks. - /// @see sgProcessor::processTasks() + /// @see MsgProcessor::processTasks() void processTasks(std::vector> const& tasks) override; /// Implement the corresponding method of the base class @@ -113,10 +135,6 @@ class Foreman : public wbase::MsgProcessor { virtual nlohmann::json statusToJson(wbase::TaskSelector const& taskSelector) override; private: - /// Set the function called when it is time to process the task. - void _setRunFunc(std::shared_ptr const& task); - - std::shared_ptr _backend; std::shared_ptr _chunkResourceMgr; util::ThreadPool::Ptr _pool; @@ -125,13 +143,19 @@ class Foreman : public wbase::MsgProcessor { util::CommandQueue::Ptr _workerCommandQueue; ///< dedicated queue for the worker commands util::ThreadPool::Ptr _workerCommandPool; ///< dedicated pool for executing worker commands - mysql::MySqlConfig const _mySqlConfig; - wpublish::QueriesAndChunks::Ptr _queries; + mysql::MySqlConfig const& _mySqlConfig; + std::shared_ptr const _queries; /// For limiting the number of MySQL connections used for tasks. - std::shared_ptr _sqlConnMgr; + std::shared_ptr const _sqlConnMgr; util::HoldTrack::Mark::Ptr _mark; + + /// Used to throttle outgoing massages to prevent czars from being overloaded. + std::shared_ptr const _transmitMgr; + + /// Worker configuration parameters. + wconfig::WorkerConfig const& _workerConfig; }; } // namespace lsst::qserv::wcontrol diff --git a/src/wcontrol/SqlConnMgr.cc b/src/wcontrol/SqlConnMgr.cc index c7b3be6722..c7eaae0409 100644 --- a/src/wcontrol/SqlConnMgr.cc +++ b/src/wcontrol/SqlConnMgr.cc @@ -26,7 +26,7 @@ // qserv headers #include "util/Bug.h" -#include "wbase/SendChannelShared.h" +#include "wbase/ChannelShared.h" #include "lsst/log/Log.h" @@ -49,7 +49,7 @@ nlohmann::json SqlConnMgr::statusToJson() const { } SqlConnMgr::ConnType SqlConnMgr::_take(bool scanQuery, - std::shared_ptr const& sendChannelShared, + std::shared_ptr const& sendChannelShared, bool firstChannelSqlConn) { ++_totalCount; LOGS(_log, LOG_LVL_DEBUG, "SqlConnMgr take " << dump()); @@ -63,7 +63,7 @@ SqlConnMgr::ConnType SqlConnMgr::_take(bool scanQuery, // normal shared scan, low priority as far as SqlConnMgr is concerned. connType = SCAN; } else { - // SendChannelShared, every SQL connection after the first one. + // ChannelShared, every SQL connection after the first one. // High priority to SqlConnMgr as these need to run to free up resources. if (sendChannelShared != nullptr) { connType = SHARED; @@ -118,7 +118,7 @@ void SqlConnMgr::_release(SqlConnMgr::ConnType connType) { // Decrementing the sendChannelShared count could result in the count // being 0 before all transmits on the sendChannelShared have finished, // causing _take() to block when it really should not. - // When the SendChannelShared is finished, it is thrown away, effectively + // When the ChannelShared is finished, it is thrown away, effectively // clearing its count. LOGS(_log, LOG_LVL_DEBUG, "SqlConnMgr release " << dump()); if (connType == SCAN) { @@ -154,7 +154,7 @@ string SqlConnMgr::dump() const { ostream& operator<<(ostream& os, SqlConnMgr const& mgr) { return mgr.dump(os); } SqlConnLock::SqlConnLock(SqlConnMgr& sqlConnMgr, bool scanQuery, - std::shared_ptr const& sendChannelShared) + std::shared_ptr const& sendChannelShared) : _sqlConnMgr(sqlConnMgr) { bool firstChannelSqlConn = true; if (sendChannelShared != nullptr) { diff --git a/src/wcontrol/SqlConnMgr.h b/src/wcontrol/SqlConnMgr.h index 97acc1b0c1..e07fed7edd 100644 --- a/src/wcontrol/SqlConnMgr.h +++ b/src/wcontrol/SqlConnMgr.h @@ -39,7 +39,7 @@ namespace lsst::qserv { namespace wbase { -class SendChannelShared; +class ChannelShared; } namespace wcontrol { @@ -99,7 +99,7 @@ class SqlConnMgr { friend class SqlConnLock; private: - ConnType _take(bool scanQuery, std::shared_ptr const& sendChannelShared, + ConnType _take(bool scanQuery, std::shared_ptr const& sendChannelShared, bool firstChannelSqlConn); void _release(ConnType connType); @@ -116,7 +116,7 @@ class SqlConnMgr { class SqlConnLock { public: SqlConnLock(SqlConnMgr& sqlConnMgr, bool scanQuery, - std::shared_ptr const& sendChannelShared); + std::shared_ptr const& sendChannelShared); SqlConnLock() = delete; SqlConnLock(SqlConnLock const&) = delete; SqlConnLock& operator=(SqlConnLock const&) = delete; diff --git a/src/wcontrol/WorkerStats.h b/src/wcontrol/WorkerStats.h index befd3c1c53..d61f450330 100644 --- a/src/wcontrol/WorkerStats.h +++ b/src/wcontrol/WorkerStats.h @@ -80,7 +80,7 @@ class WorkerStats : std::enable_shared_from_this { static util::Mutex _globalMtx; ///< Protects `_globalWorkerStats` std::atomic _queueCount{ - 0}; ///< Number of buffers on queues (there are many queues, one per SendChannelShared) + 0}; ///< Number of buffers on queues (there are many queues, one per ChannelShared) std::atomic _xrootdCount{0}; ///< Number of buffers held by xrootd. util::Histogram::Ptr _histConcurrentQueuedBuffers; ///< How many buffers are queued at a given time util::Histogram::Ptr _histXrootdOwnedBuffers; ///< How many of these buffers xrootd has at a given time diff --git a/src/wdb/QueryRunner.cc b/src/wdb/QueryRunner.cc index fd207cea7f..9114b585b9 100644 --- a/src/wdb/QueryRunner.cc +++ b/src/wdb/QueryRunner.cc @@ -64,9 +64,11 @@ #include "util/Timer.h" #include "util/threadSafe.h" #include "wbase/Base.h" -#include "wbase/SendChannelShared.h" +#include "wbase/ChannelShared.h" +#include "wcontrol/SqlConnMgr.h" #include "wdb/ChunkResource.h" #include "wpublish/QueriesAndChunks.h" +#include "xrdsvc/StreamBuffer.h" namespace { LOG_LOGGER _log = LOG_GET("lsst.qserv.wdb.QueryRunner"); @@ -260,11 +262,6 @@ class ChunkResourceRequest { bool QueryRunner::_dispatchChannel() { bool erred = false; - int numFields = -1; - // readRowsOk remains true as long as there are no problems with reading/transmitting. - // However, if it gets set to false, _mysqlConn->freeResult() needs to be - // called before this function exits. - bool readRowsOk = true; bool needToFreeRes = false; // set to true once there are results to be freed. // Collect the result in _transmitData. When a reasonable amount of data has been collected, // or there are no more rows to collect, pass _transmitData to _sendChannel. @@ -318,13 +315,9 @@ bool QueryRunner::_dispatchChannel() { // Transition task's state to the next one (reading data from MySQL and sending them to Czar). _task->queried(); - // Pass all information on to the shared object to add on to - // an existing message or build a new one as needed. - // Note that _cancelled is passed as reference so changing _cancelled will stop transmits. - if (_task->getSendChannel()->buildAndTransmitResult(res, numFields, _task, _largeResult, - _multiError, _cancelled, readRowsOk)) { - erred = true; - } + // Pass all information on to the shared object to add on to + // an existing message or build a new one as needed. + erred = _task->getSendChannel()->buildAndTransmitResult(res, _task, _multiError, _cancelled); // ATTENTION: This call is needed to record the _actual_ completion time of the task. // It rewrites the finish timestamp within the task that was made when the task got @@ -353,13 +346,6 @@ bool QueryRunner::_dispatchChannel() { // either case resources need to be freed. _mysqlConn->freeResult(); } - if (!readRowsOk) { - // This means a there was a transmit error and there's no way to - // send anything to the czar. However, there were mysql results - // that needed to be freed (see needToFree above). - LOGS(_log, LOG_LVL_ERROR, "Failed to read and transmit rows."); - return false; - } // Transmit errors, if needed. if (!_cancelled && _multiError.size() > 0) { LOGS(_log, LOG_LVL_WARN, "Transmitting error " << _task->getIdStr()); diff --git a/src/wdb/QueryRunner.h b/src/wdb/QueryRunner.h index ae2cc0e7b5..e595c3e74b 100644 --- a/src/wdb/QueryRunner.h +++ b/src/wdb/QueryRunner.h @@ -40,23 +40,13 @@ // Qserv headers #include "mysql/MySqlConfig.h" #include "mysql/MySqlConnection.h" +#include "qmeta/types.h" #include "util/MultiError.h" #include "wbase/Task.h" -#include "wbase/TransmitData.h" -#include "wcontrol/SqlConnMgr.h" #include "wdb/ChunkResource.h" namespace lsst::qserv { -namespace proto { -class ProtoHeader; -class Result; -} // namespace proto - -namespace util { -class TimerHistogram; -} - namespace xrdsvc { class StreamBuffer; } diff --git a/src/wdb/testQueryRunner.cc b/src/wdb/testQueryRunner.cc index d944a93db2..b8d027cfe6 100644 --- a/src/wdb/testQueryRunner.cc +++ b/src/wdb/testQueryRunner.cc @@ -28,6 +28,7 @@ */ // Qserv headers +#include "mysql/MySqlConfig.h" #include "proto/worker.pb.h" #include "proto/ProtoImporter.h" #include "util/StringHash.h" @@ -62,8 +63,8 @@ using lsst::qserv::proto::TaskMsg_Subchunk; using lsst::qserv::wbase::SendChannel; using lsst::qserv::wbase::SendChannelShared; using lsst::qserv::wbase::Task; -using lsst::qserv::wbase::SendChannel; using lsst::qserv::wconfig::WorkerConfig; +using lsst::qserv::wcontrol::SqlConnMgr; using lsst::qserv::wcontrol::TransmitMgr; using lsst::qserv::wdb::ChunkResource; using lsst::qserv::wdb::ChunkResourceMgr; @@ -104,30 +105,30 @@ struct Fixture { BOOST_FIXTURE_TEST_SUITE(Basic, Fixture) BOOST_AUTO_TEST_CASE(Simple) { + WorkerConfig::create(); shared_ptr msg(newTaskMsg()); shared_ptr sendC(SendChannel::newNopChannel()); auto sc = SendChannelShared::create(sendC, locTransmitMgr, 1); - WorkerConfig::create(); - auto taskVect = Task::createTasks(msg, sc); - Task::Ptr task = taskVect[0]; FakeBackend::Ptr backend = make_shared(); shared_ptr crm = ChunkResourceMgr::newMgr(backend); SqlConnMgr::Ptr sqlConnMgr = make_shared(20, 15); + auto taskVect = Task::createTasks(msg, sc, crm, newMySqlConfig(), sqlConnMgr); + Task::Ptr task = taskVect[0]; QueryRunner::Ptr a(QueryRunner::newQueryRunner(task, crm, newMySqlConfig(), sqlConnMgr)); BOOST_CHECK(a->runQuery()); } BOOST_AUTO_TEST_CASE(Output) { + WorkerConfig::create(); string out; shared_ptr msg(newTaskMsg()); shared_ptr sendC(SendChannel::newStringChannel(out)); auto sc = SendChannelShared::create(sendC, locTransmitMgr, 1); - WorkerConfig::create(); - auto taskVect = Task::createTasks(msg, sc); - Task::Ptr task = taskVect[0]; FakeBackend::Ptr backend = make_shared(); shared_ptr crm = ChunkResourceMgr::newMgr(backend); SqlConnMgr::Ptr sqlConnMgr = make_shared(20, 15); + auto taskVect = Task::createTasks(msg, sc, crm, newMySqlConfig(), sqlConnMgr); + Task::Ptr task = taskVect[0]; QueryRunner::Ptr a(QueryRunner::newQueryRunner(task, crm, newMySqlConfig(), sqlConnMgr)); BOOST_CHECK(a->runQuery()); diff --git a/src/wsched/testSchedulers.cc b/src/wsched/testSchedulers.cc index 7979cd7028..4d2d2073cd 100644 --- a/src/wsched/testSchedulers.cc +++ b/src/wsched/testSchedulers.cc @@ -32,6 +32,7 @@ // Qserv headers #include "memman/MemManNone.h" +#include "mysql/MySqlConfig.h" #include "proto/ScanTableInfo.h" #include "proto/worker.pb.h" #include "util/Command.h" @@ -39,6 +40,7 @@ #include "wbase/SendChannelShared.h" #include "wbase/Task.h" #include "wconfig/WorkerConfig.h" +#include "wcontrol/SqlConnMgr.h" #include "wcontrol/TransmitMgr.h" #include "wpublish/QueriesAndChunks.h" #include "wsched/ChunkTasksQueue.h" @@ -59,25 +61,32 @@ LOG_LOGGER _log = LOG_GET("lsst.qserv.wsched.testSchedulers"); } using namespace std; +using lsst::qserv::mysql::MySqlConfig; using lsst::qserv::proto::TaskMsg; using lsst::qserv::wbase::SendChannel; using lsst::qserv::wbase::SendChannelShared; using lsst::qserv::wbase::Task; using lsst::qserv::wconfig::WorkerConfig; +using lsst::qserv::wcontrol::SqlConnMgr; +using lsst::qserv::wdb::ChunkResourceMgr; double const oneHr = 60.0; lsst::qserv::wcontrol::TransmitMgr::Ptr locTransmitMgr = std::make_shared(50, 4); +shared_ptr crm; // not used in this test, required by Task::createTasks +MySqlConfig mySqlConfig; // not used in this test, required by Task::createTasks +SqlConnMgr::Ptr sqlConnMgr; // not used in this test, required by Task::createTasks + std::vector locSendSharedPtrs; Task::Ptr makeTask(std::shared_ptr tm) { + WorkerConfig::create(); auto sendC = std::make_shared(); auto sc = SendChannelShared::create(sendC, locTransmitMgr, 1); locSendSharedPtrs.push_back(sc); - WorkerConfig::create(); - auto taskVect = Task::createTasks(tm, sc); + auto taskVect = Task::createTasks(tm, sc, crm, mySqlConfig, sqlConnMgr); Task::Ptr task = taskVect[0]; task->setSafeToMoveRunning(true); // Can't wait for MemMan in unit tests. return task; diff --git a/src/xrdsvc/SsiRequest.cc b/src/xrdsvc/SsiRequest.cc index 641897d6ad..c6fb005657 100644 --- a/src/xrdsvc/SsiRequest.cc +++ b/src/xrdsvc/SsiRequest.cc @@ -43,9 +43,10 @@ #include "util/InstanceCount.h" #include "util/HoldTrack.h" #include "util/Timer.h" -#include "wbase/MsgProcessor.h" #include "wbase/SendChannelShared.h" #include "wbase/TaskState.h" +#include "wbase/Task.h" +#include "wcontrol/Foreman.h" #include "wpublish/AddChunkGroupCommand.h" #include "wpublish/ChunkListCommand.h" #include "wpublish/GetChunkListCommand.h" @@ -82,7 +83,24 @@ wbase::TaskSelector proto2taskSelector(proto::WorkerCommandGetStatusM const& mes namespace lsst::qserv::xrdsvc { -std::shared_ptr SsiRequest::_resourceMonitor(new wpublish::ResourceMonitor()); +std::shared_ptr const SsiRequest::_resourceMonitor( + new wpublish::ResourceMonitor()); + +SsiRequest::Ptr SsiRequest::newSsiRequest(std::string const& rname, + std::shared_ptr const& chunkInventory, + std::shared_ptr const& foreman) { + auto req = SsiRequest::Ptr(new SsiRequest(rname, chunkInventory, foreman)); + req->_selfKeepAlive = req; + return req; +} + +SsiRequest::SsiRequest(std::string const& rname, + std::shared_ptr const& chunkInventory, + std::shared_ptr const& foreman) + : _chunkInventory(chunkInventory), + _validator(_chunkInventory->newValidator()), + _foreman(foreman), + _resourceName(rname) {} SsiRequest::~SsiRequest() { LOGS(_log, LOG_LVL_DEBUG, "~SsiRequest()"); @@ -128,6 +146,8 @@ void SsiRequest::execute(XrdSsiRequest& req) { return; } + auto const sendChannel = std::make_shared(shared_from_this()); + // Process the request switch (ru.unitType()) { case ResourceUnit::DBCHUNK: { @@ -163,24 +183,23 @@ void SsiRequest::execute(XrdSsiRequest& req) { " czarid:" + std::to_string(taskMsg->has_czarid())); return; } + auto const sendChannelShared = + wbase::SendChannelShared::create(sendChannel, _foreman->transmitMgr(), taskMsg->czarid()); + auto const tasks = + wbase::Task::createTasks(taskMsg, sendChannelShared, _foreman->chunkResourceMgr(), + _foreman->mySqlConfig(), _foreman->sqlConnMgr()); + for (auto const& task : tasks) { + _tasks.push_back(task); + } // Now that the request is decoded (successfully or not), release the // xrootd request buffer. To avoid data races, this must happen before // the task is handed off to another thread for processing, as there is a // reference to this SsiRequest inside the reply channel for the task, // and after the call to BindRequest. - auto sendChannelBase = std::make_shared(shared_from_this()); - auto sendChannel = - wbase::SendChannelShared::create(sendChannelBase, _transmitMgr, taskMsg->czarid()); - auto tasks = wbase::Task::createTasks(taskMsg, sendChannel); - - for (auto const& task : tasks) { - _tasks.push_back(task); - } - ReleaseRequestBuffer(); t.start(); - _processor->processTasks(tasks); // Queues tasks to be run later. + _foreman->processTasks(tasks); // Queues tasks to be run later. t.stop(); LOGS(_log, LOG_LVL_DEBUG, "Enqueued TaskMsg for " << ru << " in " << t.getElapsed() << " seconds"); @@ -189,13 +208,13 @@ void SsiRequest::execute(XrdSsiRequest& req) { case ResourceUnit::WORKER: { LOGS(_log, LOG_LVL_DEBUG, "Parsing WorkerCommand for resource=" << _resourceName); - wbase::WorkerCommand::Ptr const command = parseWorkerCommand(reqData, reqSize); + wbase::WorkerCommand::Ptr const command = parseWorkerCommand(sendChannel, reqData, reqSize); if (not command) return; // The buffer must be released before submitting commands for // further processing. ReleaseRequestBuffer(); - _processor->processCommand(command); // Queues the command to be run later. + _foreman->processCommand(command); // Queues the command to be run later. LOGS(_log, LOG_LVL_DEBUG, "Enqueued WorkerCommand for resource=" << _resourceName); ++countLimiter; @@ -238,11 +257,9 @@ void SsiRequest::execute(XrdSsiRequest& req) { // to actually do something once everything is actually setup. } -wbase::WorkerCommand::Ptr SsiRequest::parseWorkerCommand(char const* reqData, int reqSize) { - wbase::SendChannel::Ptr const sendChannel = std::make_shared(shared_from_this()); - +wbase::WorkerCommand::Ptr SsiRequest::parseWorkerCommand( + std::shared_ptr const& sendChannel, char const* reqData, int reqSize) { wbase::WorkerCommand::Ptr command; - try { // reqData has the entire request, so we can unpack it without waiting for // more data. @@ -275,11 +292,12 @@ wbase::WorkerCommand::Ptr SsiRequest::parseWorkerCommand(char const* reqData, in bool const force = group.force(); if (header.command() == proto::WorkerCommandH::ADD_CHUNK_GROUP) - command = std::make_shared(sendChannel, _chunkInventory, - _mySqlConfig, chunk, dbs); + command = std::make_shared( + sendChannel, _chunkInventory, _foreman->mySqlConfig(), chunk, dbs); else command = std::make_shared( - sendChannel, _chunkInventory, _resourceMonitor, _mySqlConfig, chunk, dbs, force); + sendChannel, _chunkInventory, _resourceMonitor, _foreman->mySqlConfig(), chunk, + dbs, force); break; } case proto::WorkerCommandH::UPDATE_CHUNK_LIST: { @@ -288,10 +306,10 @@ wbase::WorkerCommand::Ptr SsiRequest::parseWorkerCommand(char const* reqData, in if (message.rebuild()) command = std::make_shared( - sendChannel, _chunkInventory, _mySqlConfig, message.reload()); + sendChannel, _chunkInventory, _foreman->mySqlConfig(), message.reload()); else command = std::make_shared(sendChannel, _chunkInventory, - _mySqlConfig); + _foreman->mySqlConfig()); break; } case proto::WorkerCommandH::GET_CHUNK_LIST: { @@ -313,16 +331,16 @@ wbase::WorkerCommand::Ptr SsiRequest::parseWorkerCommand(char const* reqData, in databases.push_back(message.databases(i)); } bool const force = message.force(); - command = std::make_shared(sendChannel, _chunkInventory, - _resourceMonitor, _mySqlConfig, - chunks, databases, force); + command = std::make_shared( + sendChannel, _chunkInventory, _resourceMonitor, _foreman->mySqlConfig(), chunks, + databases, force); break; } case proto::WorkerCommandH::GET_STATUS: { proto::WorkerCommandGetStatusM message; view.parse(message); command = std::make_shared( - sendChannel, _processor, _resourceMonitor, ::proto2taskSelector(message)); + sendChannel, _foreman, _resourceMonitor, ::proto2taskSelector(message)); break; } default: diff --git a/src/xrdsvc/SsiRequest.h b/src/xrdsvc/SsiRequest.h index 0d4dc0cc5f..fafce6cc40 100644 --- a/src/xrdsvc/SsiRequest.h +++ b/src/xrdsvc/SsiRequest.h @@ -35,7 +35,6 @@ // Qserv headers #include "global/ResourceUnit.h" #include "mysql/MySqlConfig.h" -#include "wbase/Task.h" #include "wbase/WorkerCommand.h" #include "wpublish/ChunkInventory.h" #include "xrdsvc/StreamBuffer.h" @@ -45,11 +44,11 @@ class XrdSsiService; namespace lsst::qserv { namespace wbase { -struct MsgProcessor; +class SendChannel; class Task; } // namespace wbase namespace wcontrol { -class TransmitMgr; +class Foreman; } namespace wpublish { class ResourceMonitor; @@ -75,14 +74,7 @@ class SsiRequest : public XrdSsiResponder, public std::enable_shared_from_this const& chunkInventory, - std::shared_ptr const& processor, - mysql::MySqlConfig const& mySqlConfig, - std::shared_ptr const& transmitMgr) { - auto req = - SsiRequest::Ptr(new SsiRequest(rname, chunkInventory, processor, mySqlConfig, transmitMgr)); - req->_selfKeepAlive = req; - return req; - } + std::shared_ptr const& processor); virtual ~SsiRequest(); @@ -117,14 +109,7 @@ class SsiRequest : public XrdSsiResponder, public std::enable_shared_from_this const& chunkInventory, - std::shared_ptr const& processor, mysql::MySqlConfig const& mySqlConfig, - std::shared_ptr const& transmitMgr) - : _chunkInventory(chunkInventory), - _validator(_chunkInventory->newValidator()), - _processor(processor), - _resourceName(rname), - _mySqlConfig(mySqlConfig), - _transmitMgr(transmitMgr) {} + std::shared_ptr const& processor); /// For internal error reporting void reportError(std::string const& errStr); @@ -132,21 +117,23 @@ class SsiRequest : public XrdSsiResponder, public std::enable_shared_from_this const& sendChannel, + char const* reqData, int reqSize); private: /// Counters of the database/chunk requests which are being used - static std::shared_ptr _resourceMonitor; + static std::shared_ptr const _resourceMonitor; - std::shared_ptr _chunkInventory; + std::shared_ptr const _chunkInventory; - ValidatorPtr _validator; ///< validates request against what's available - std::shared_ptr _processor; ///< actual msg processor + ValidatorPtr _validator; ///< validates request against what's available + std::shared_ptr const _foreman; ///< actual msg processor std::mutex _finMutex; ///< Protects execute() from Finish(), _finished, and _stream std::atomic _reqFinished{false}; ///< set to true when Finished called @@ -156,9 +143,6 @@ class SsiRequest : public XrdSsiResponder, public std::enable_shared_from_this> _tasks; ///< List of tasks for use in cancellation. - mysql::MySqlConfig const _mySqlConfig; - std::shared_ptr _transmitMgr; ///< limits transmits to czars. - /// Make sure this object exists until Finish() is called. /// Make a local copy before calling reset() within and non-static member function. Ptr _selfKeepAlive; diff --git a/src/xrdsvc/SsiService.cc b/src/xrdsvc/SsiService.cc index 16f6ffcfae..d0a9308cd0 100644 --- a/src/xrdsvc/SsiService.cc +++ b/src/xrdsvc/SsiService.cc @@ -164,12 +164,12 @@ SsiService::SsiService(XrdSsiLogger* log, wconfig::WorkerConfig const& workerCon int const maxTransmits = workerConfig->getMaxTransmits(); int const maxPerQid = workerConfig->getMaxPerQid(); - _transmitMgr = make_shared(maxTransmits, maxPerQid); - LOGS(_log, LOG_LVL_WARN, "config transmitMgr" << *_transmitMgr); + auto const transmitMgr = make_shared(maxTransmits, maxPerQid); + LOGS(_log, LOG_LVL_WARN, "config transmitMgr" << *transmitMgr); LOGS(_log, LOG_LVL_WARN, "maxPoolThreads=" << maxPoolThreads); - _foreman = make_shared(blendSched, poolSize, maxPoolThreads, - _mySqlConfig, queries, sqlConnMgr); + _foreman = make_shared(blendSched, poolSize, maxPoolThreads, _mySqlConfig, queries, + sqlConnMgr, transmitMgr, workerConfig); // Watch to see if the log configuration is changed. // If LSST_LOG_CONFIG is not defined, there's no good way to know what log @@ -188,8 +188,7 @@ SsiService::~SsiService() { LOGS(_log, LOG_LVL_DEBUG, "SsiService dying."); } void SsiService::ProcessRequest(XrdSsiRequest& reqRef, XrdSsiResource& resRef) { LOGS(_log, LOG_LVL_DEBUG, "Got request call where rName is: " << resRef.rName); - auto request = - SsiRequest::newSsiRequest(resRef.rName, _chunkInventory, _foreman, _mySqlConfig, _transmitMgr); + auto request = SsiRequest::newSsiRequest(resRef.rName, _chunkInventory, _foreman); // Continue execution in the session object as SSI gave us a new thread. // Object deletes itself when finished is called. diff --git a/src/xrdsvc/SsiService.h b/src/xrdsvc/SsiService.h index aa54c18245..8b1568f0db 100644 --- a/src/xrdsvc/SsiService.h +++ b/src/xrdsvc/SsiService.h @@ -43,7 +43,6 @@ class FileMonitor; } namespace wcontrol { class Foreman; -class TransmitMgr; } // namespace wcontrol namespace wpublish { class ChunkInventory; @@ -56,11 +55,10 @@ namespace lsst::qserv::xrdsvc { /// worker services class SsiService : public XrdSsiService { public: - /** - * Build a SsiService object + /** Build a SsiService object * @param log xrdssi logger + * @note take ownership of logger for now */ - // take ownership of logger for now SsiService(XrdSsiLogger* log); virtual ~SsiService(); @@ -78,9 +76,6 @@ class SsiService : public XrdSsiService { // The Foreman contains essential structures for adding and running tasks. std::shared_ptr _foreman; - /// Used to throttle outgoing massages to prevent czars from being overloaded. - std::shared_ptr _transmitMgr; - mysql::MySqlConfig const _mySqlConfig; /// Reloads the log configuration file on log config file change. From 526eaa15dbddcd8f9db7b02b03eaf1fa4dda9ed3 Mon Sep 17 00:00:00 2001 From: Igor Gaponenko Date: Wed, 8 Mar 2023 02:54:55 +0000 Subject: [PATCH 5/8] Implemented writing and serving result files by workers The new result writer redirects result sets into files at workers. Only the last "summary" message with no rows is sent to Czar for each worker query (regardless if a number of tasks that were required to process the query). Also did the minor refactoring in the error messages Added HTTP server to Qserv worker for serving result files Also made minor fixes and improvements to basic tests of QHTTP made in the Replication/Ingest system code base. Using a configuration option to select the desired result delivery protocol Added support (is configured) for deleting unclaimed result files at the startup time of the worker service and after restarting the Czar. --- src/replica/QhttpTestApp.cc | 7 +- src/wbase/CMakeLists.txt | 1 + src/wbase/ChannelShared.h | 4 +- src/wbase/FileChannelShared.cc | 303 +++++++++++++++++++++++++++++++++ src/wbase/FileChannelShared.h | 160 +++++++++++++++++ src/wbase/Task.cc | 87 +++++++--- src/wbase/Task.h | 43 +++-- src/wbase/TransmitData.cc | 24 ++- src/wbase/TransmitData.h | 13 ++ src/wcontrol/CMakeLists.txt | 1 + src/wcontrol/Foreman.cc | 72 +++++++- src/wcontrol/Foreman.h | 34 ++-- src/xrdsvc/SsiRequest.cc | 59 ++++++- src/xrdsvc/SsiService.cc | 12 +- 14 files changed, 745 insertions(+), 75 deletions(-) create mode 100644 src/wbase/FileChannelShared.cc create mode 100644 src/wbase/FileChannelShared.h diff --git a/src/replica/QhttpTestApp.cc b/src/replica/QhttpTestApp.cc index 62535fed5a..0e5688a3f4 100644 --- a/src/replica/QhttpTestApp.cc +++ b/src/replica/QhttpTestApp.cc @@ -126,7 +126,7 @@ int QhttpTestApp::runImpl() { boost::asio::io_service io_service; qhttp::Server::Ptr const httpServer = qhttp::Server::create(io_service, _port, _backlog); - httpServer->addHandlers({{"GET", "/service/receive", + httpServer->addHandlers({{"POST", "/service/receive", [&](qhttp::Request::Ptr const& req, qhttp::Response::Ptr const& resp) { ++numRequests; if (_verbose) @@ -136,7 +136,7 @@ int QhttpTestApp::runImpl() { json const reply({{"success", 1}}); resp->send(reply.dump(), "application/json"); }}, - {"GET", "/service/echo", + {"POST", "/service/echo", [&](qhttp::Request::Ptr const& req, qhttp::Response::Ptr const& resp) { ++numRequests; if (_verbose) @@ -155,8 +155,6 @@ int QhttpTestApp::runImpl() { if (_verbose) cout << ::timestamp() << "Request: " << ::senderIpAddr(req) << " /service/random" << endl; - uint64_t const numBytes = readBody(req); - numBytesReceived += numBytes; uint64_t const numBytesRandom = distr(gen); numBytesSent += numBytesRandom; string const data(numBytesRandom, 'x'); @@ -169,7 +167,6 @@ int QhttpTestApp::runImpl() { if (_verbose) cout << ::timestamp() << "Request: " << ::senderIpAddr(req) << " /management/stop" << endl; - numBytesReceived += readBody(req); json const reply({{"success", 1}}); resp->send(reply.dump(), "application/json"); httpServer->stop(); diff --git a/src/wbase/CMakeLists.txt b/src/wbase/CMakeLists.txt index 01b051a2c9..441f911478 100644 --- a/src/wbase/CMakeLists.txt +++ b/src/wbase/CMakeLists.txt @@ -4,6 +4,7 @@ add_dependencies(wbase proto) target_sources(wbase PRIVATE Base.cc ChannelShared.cc + FileChannelShared.cc SendChannel.cc SendChannelShared.cc Task.cc diff --git a/src/wbase/ChannelShared.h b/src/wbase/ChannelShared.h index 81c17483e7..a8d4efc217 100644 --- a/src/wbase/ChannelShared.h +++ b/src/wbase/ChannelShared.h @@ -148,8 +148,8 @@ class ChannelShared { /// Prepare the transmit data and then call addTransmit. /// @param tMtxLock - Lock on mutex tMtx to be acquired before calling the method. - virtual bool prepTransmit(std::lock_guard const& tMtxLock, std::shared_ptr const& task, - bool cancelled, bool lastIn); + bool prepTransmit(std::lock_guard const& tMtxLock, std::shared_ptr const& task, + bool cancelled, bool lastIn); /// Try to transmit the data in tData. /// If the queue already has at least 2 TransmitData objects, addTransmit diff --git a/src/wbase/FileChannelShared.cc b/src/wbase/FileChannelShared.cc new file mode 100644 index 0000000000..c3715362a1 --- /dev/null +++ b/src/wbase/FileChannelShared.cc @@ -0,0 +1,303 @@ +/* + * LSST Data Management System + * + * This product includes software developed by the + * LSST Project (http://www.lsst.org/). + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the LSST License Statement and + * the GNU General Public License along with this program. If not, + * see . + */ + +// Class header +#include "wbase/FileChannelShared.h" + +// System headers +#include +#include + +// Third party headers +#include "boost/filesystem.hpp" +#include "boost/range/iterator_range.hpp" + +// Qserv headers +#include "proto/ProtoHeaderWrap.h" +#include "proto/worker.pb.h" +#include "wbase/Task.h" +#include "wconfig/WorkerConfig.h" +#include "wpublish/QueriesAndChunks.h" +#include "util/MultiError.h" +#include "util/Timer.h" + +// LSST headers +#include "lsst/log/Log.h" + +using namespace std; +namespace fs = boost::filesystem; +namespace wconfig = lsst::qserv::wconfig; + +namespace { + +LOG_LOGGER _log = LOG_GET("lsst.qserv.wbase.FileChannelShared"); + +/** + * Iterate over the result files at the results folder and remove those + * which satisfy the desired criteria. + * @param context The calling context (used for logging purposes). + * @param fileCanBeRemoved The optional validator to be called for each candidate file. + * Note that missing validator means "yes" the candidate file can be removed. + * @return The total number of removed files. + */ +size_t cleanUpResultsImpl(string const& context, fs::path const& dirPath, + function fileCanBeRemoved = nullptr) { + size_t numFilesRemoved = 0; + string const ext = ".proto"; + boost::system::error_code ec; + auto itr = fs::directory_iterator(dirPath, ec); + if (ec.value() != 0) { + LOGS(_log, LOG_LVL_WARN, + context << "failed to open the results folder " << dirPath << ", ec: " << ec << "."); + return numFilesRemoved; + } + for (auto&& entry : boost::make_iterator_range(itr, {})) { + auto filePath = entry.path(); + bool const removeIsCleared = + filePath.has_filename() && filePath.has_extension() && (filePath.extension() == ext) && + ((fileCanBeRemoved == nullptr) || fileCanBeRemoved(filePath.filename().string())); + if (removeIsCleared) { + fs::remove_all(filePath, ec); + if (ec.value() != 0) { + LOGS(_log, LOG_LVL_WARN, + context << "failed to remove result file " << filePath << ", ec: " << ec << "."); + } else { + LOGS(_log, LOG_LVL_INFO, context << "removed result file " << filePath << "."); + ++numFilesRemoved; + } + } + } + return numFilesRemoved; +} + +} // namespace + +namespace lsst::qserv::wbase { + +mutex FileChannelShared::_resultsDirCleanupMtx; + +void FileChannelShared::cleanUpResultsOnCzarRestart(QueryId queryId) { + string const context = "FileChannelShared::" + string(__func__) + " "; + fs::path const dirPath = wconfig::WorkerConfig::instance()->resultsDirname(); + LOGS(_log, LOG_LVL_INFO, + context << "removing result files from " << dirPath << " for queryId=" << queryId << " or older."); + lock_guard const lock(_resultsDirCleanupMtx); + size_t const numFilesRemoved = + ::cleanUpResultsImpl(context, dirPath, [queryId, &context](string const& fileName) -> bool { + try { + // Names of the result files begin with identifiers of the corresponding queries: + // '-...' + auto const pos = fileName.find_first_of('-'); + return (pos != string::npos) && (pos != 0) && + (stoull(fileName.substr(0, pos)) <= queryId); + } catch (exception const& ex) { + LOGS(_log, LOG_LVL_WARN, + context << "failed to locate queryId in the file name " << fileName + << ", ex: " << ex.what()); + return false; + } + }); + LOGS(_log, LOG_LVL_INFO, + context << "removed " << numFilesRemoved << " result files from " << dirPath << "."); +} + +void FileChannelShared::cleanUpResultsOnWorkerRestart() { + string const context = "FileChannelShared::" + string(__func__) + " "; + fs::path const dirPath = wconfig::WorkerConfig::instance()->resultsDirname(); + LOGS(_log, LOG_LVL_INFO, context << "removing all result files from " << dirPath << "."); + lock_guard const lock(_resultsDirCleanupMtx); + size_t const numFilesRemoved = ::cleanUpResultsImpl(context, dirPath); + LOGS(_log, LOG_LVL_INFO, + context << "removed " << numFilesRemoved << " result files from " << dirPath << "."); +} + +void FileChannelShared::cleanUpResults(QueryId queryId) { + string const context = "FileChannelShared::" + string(__func__) + " "; + fs::path const dirPath = wconfig::WorkerConfig::instance()->resultsDirname(); + string const queryIdPrefix = to_string(queryId) + "-"; + LOGS(_log, LOG_LVL_INFO, + context << "removing result files from " << dirPath << " for queryId=" << queryId << "."); + lock_guard const lock(_resultsDirCleanupMtx); + size_t const numFilesRemoved = + ::cleanUpResultsImpl(context, dirPath, [&queryIdPrefix](string const& fileName) -> bool { + // Names of the result files begin with identifiers of the corresponding queries: + // '-...' + return fileName.substr(0, queryIdPrefix.size()) == queryIdPrefix; + }); + LOGS(_log, LOG_LVL_INFO, + context << "removed " << numFilesRemoved << " result files from " << dirPath << "."); +} + +FileChannelShared::Ptr FileChannelShared::create(shared_ptr const& sendChannel, + shared_ptr const& transmitMgr, + shared_ptr const& taskMsg) { + lock_guard const lock(_resultsDirCleanupMtx); + return shared_ptr(new FileChannelShared(sendChannel, transmitMgr, taskMsg)); +} + +FileChannelShared::FileChannelShared(shared_ptr const& sendChannel, + shared_ptr const& transmitMgr, + shared_ptr const& taskMsg) + : ChannelShared(sendChannel, transmitMgr, taskMsg->czarid()) {} + +FileChannelShared::~FileChannelShared() { + // Normally, the channel should not be dead before the base class's d-tor + // gets called. If it's already dead it means there was a problem to process + // a query or send back a response to Czar. In either case, the file + // would be useless and it has to be deleted to avoid leaving unclaimed + // result files at the results folder. + if (isDead()) { + _removeFile(lock_guard(tMtx)); + } +} + +bool FileChannelShared::buildAndTransmitResult(MYSQL_RES* mResult, shared_ptr const& task, + util::MultiError& multiErr, atomic& cancelled) { + // Operation stats. Note that "buffer fill time" included the amount + // of time needed to write the result set to disk. + util::Timer transmitT; + transmitT.start(); + + double bufferFillSecs = 0.0; + int bytesTransmitted = 0; + int rowsTransmitted = 0; + + // Keep reading rows and converting those into messages while any + // are still left in the result set. The row processing method + // will write rows into the output file. The final "summary" message + // will be sant back to Czar after processing the very last set of rows + // of the last task of a request. + bool erred = false; + bool hasMoreRows = true; + + // This lock is to protect transmitData from having other Tasks mess with it + // while data is loading. + lock_guard const tMtxLock(tMtx); + + while (hasMoreRows && !cancelled) { + util::Timer bufferFillT; + bufferFillT.start(); + + // Initialize transmitData, if needed. + initTransmit(tMtxLock, *task); + + // Transfer rows from a result set into the data buffer. Note that tSize + // is set by fillRows. A value of this variable is presently not used by + // the code. + size_t tSize = 0; + hasMoreRows = !transmitData->fillRows(mResult, tSize); + + // Serialize the content of the data buffer into the Protobuf data message + // that will be writen into the output file. + transmitData->buildDataMsg(*task, multiErr); + _writeToFile(tMtxLock, task, transmitData->dataMsg()); + + bufferFillT.stop(); + bufferFillSecs += bufferFillT.getElapsed(); + + int const bytes = transmitData->getResultSize(); + int const rows = transmitData->getResultRowCount(); + bytesTransmitted += bytes; + rowsTransmitted += rows; + _rowcount += rows; + _transmitsize += bytes; + + // If no more rows are left in the task's result set then we need to check + // if this is last task in a logical group of ones created for processing + // the current request (note that certain classes of requests may require + // more than one task for processing). + if (!hasMoreRows && transmitTaskLast()) { + // Make sure the file is sync to disk before notifying Czar. + _file.flush(); + _file.close(); + + // Only the last ("summary") message w/o any rows is sent to Czar to notify + // the about completion of the request. + transmitData->prepareResponse(*task, _rowcount, _transmitsize); + bool const lastIn = true; + if (!prepTransmit(tMtxLock, task, cancelled, lastIn)) { + LOGS(_log, LOG_LVL_ERROR, "Could not transmit the summary message to Czar."); + erred = true; + break; + } + } else { + // Scrap the transmit buffer to be ready for processing the next set of rows + // of the current or the next task of the request. + transmitData.reset(); + } + } + transmitT.stop(); + double timeSeconds = transmitT.getElapsed(); + auto qStats = task->getQueryStats(); + if (qStats == nullptr) { + LOGS(_log, LOG_LVL_ERROR, "No statistics for " << task->getIdStr()); + } else { + qStats->addTaskTransmit(timeSeconds, bytesTransmitted, rowsTransmitted, bufferFillSecs); + LOGS(_log, LOG_LVL_TRACE, + "TaskTransmit time=" << timeSeconds << " bufferFillSecs=" << bufferFillSecs); + } + + // No reason to keep the file after a failure (hit while processing a query, + // extracting a result set into the file) or query cancellation. This also + // includes problems encountered while sending a response back to Czar after + // sucesufully processing the query and writing all results into the file. + // The file is not going to be used by Czar in either of these scenarios. + if (cancelled || erred || isDead()) { + _removeFile(tMtxLock); + } + return erred; +} + +void FileChannelShared::_writeToFile(lock_guard const& tMtxLock, shared_ptr const& task, + string const& msg) { + if (!_file.is_open()) { + _fileName = task->resultFilePath(); + _file.open(_fileName, ios::out | ios::trunc | ios::binary); + if (!(_file.is_open() && _file.good())) { + throw runtime_error("FileChannelShared::" + string(__func__) + + " failed to create/truncate the file '" + _fileName + "'."); + } + } + // Write 32-bit length of the subsequent message first before writing + // the message itself. + uint32_t const msgSizeBytes = msg.size(); + _file.write(reinterpret_cast(&msgSizeBytes), sizeof msgSizeBytes); + _file.write(msg.data(), msgSizeBytes); + if (!(_file.is_open() && _file.good())) { + throw runtime_error("FileChannelShared::" + string(__func__) + " failed to write " + + to_string(msg.size()) + " bytes into the file '" + _fileName + "'."); + } +} + +void FileChannelShared::_removeFile(lock_guard const& tMtxLock) { + if (!_fileName.empty() && _file.is_open()) { + _file.close(); + boost::system::error_code ec; + fs::remove_all(fs::path(_fileName), ec); + if (ec.value() != 0) { + LOGS(_log, LOG_LVL_WARN, + "FileChannelShared::" << __func__ << " failed to remove the result file '" << _fileName + << "', ec: " << ec << "."); + } + } +} + +} // namespace lsst::qserv::wbase diff --git a/src/wbase/FileChannelShared.h b/src/wbase/FileChannelShared.h new file mode 100644 index 0000000000..a19acb26e2 --- /dev/null +++ b/src/wbase/FileChannelShared.h @@ -0,0 +1,160 @@ +/* + * LSST Data Management System + * + * This product includes software developed by the + * LSST Project (http://www.lsst.org/). + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the LSST License Statement and + * the GNU General Public License along with this program. If not, + * see . + */ + +#ifndef LSST_QSERV_WBASE_FILECHANNELSHARED_H +#define LSST_QSERV_WBASE_FILECHANNELSHARED_H + +// System headers +#include +#include +#include +#include +#include + +// Third-party headers +#include + +// Qserv headers +#include "global/intTypes.h" +#include "qmeta/types.h" +#include "wbase/ChannelShared.h" + +namespace lsst::qserv::proto { +class TaskMsg; +} + +namespace lsst::qserv::wbase { +class SendChannel; +class Task; +} // namespace lsst::qserv::wbase + +namespace lsst::qserv::wcontrol { +class TransmitMgr; +} + +namespace lsst::qserv::util { +class MultiError; +} + +namespace lsst::qserv::wbase { + +/// The class is responsible for writing mysql result rows as Protobuf +/// serialized messages into an output file. Once a task (or all sub-chunk +/// tasks) finished writing data a short reply message is sent back to Czar using +/// SSI request's SendChannel that was provided to the factory method +/// of the class. Error messages would be also sent via te same channel. +/// A partially written file will be automatically deleted in case of +/// errors. +/// +/// When building messages for result rows, multiple tasks may add to the +/// the output file before it gets closed and a reply is transmitted to the czar. +/// All the tasks adding rows to the TransmitData object must be operating on +/// the same chunk. This only happens for near-neighbor queries, which +/// have one task per subchunk. +class FileChannelShared : public ChannelShared { +public: + using Ptr = std::shared_ptr; + + /** + * This method gets called upon receiving a notification from Czar about + * the Czar service restart. The method will clean result files corresponding + * to the older (including the specified one) queries. + * @note The method may be called 0 or many times during the lifetime of the worker service. + * @param queryId The most recent user query registered before restart. + */ + static void cleanUpResultsOnCzarRestart(QueryId queryId); + + /** + * This method gets called exactly one time during the initial startup + * initialization of the worker service. + */ + static void cleanUpResultsOnWorkerRestart(); + + /** + * Clean up result files of the specified query. + * @param queryId The most recent user query registered before restart. + */ + static void cleanUpResults(QueryId queryId); + + /// The factory method for the channel class. + static Ptr create(std::shared_ptr const& sendChannel, + std::shared_ptr const& transmitMgr, + std::shared_ptr const& taskMsg); + + FileChannelShared() = delete; + FileChannelShared(FileChannelShared const&) = delete; + FileChannelShared& operator=(FileChannelShared const&) = delete; + + // Non-trivial d-tor is needed to garbage collect the file after failures. + virtual ~FileChannelShared() override; + + /// @see ChannelShared::buildAndTransmitResult() + virtual bool buildAndTransmitResult(MYSQL_RES* mResult, std::shared_ptr const& task, + util::MultiError& multiErr, std::atomic& cancelled) override; + +private: + /// Private constructor to protect shared pointer integrity. + FileChannelShared(std::shared_ptr const& sendChannel, + std::shared_ptr const& transmitMgr, + std::shared_ptr const& taskMsg); + + /** + * Write a message into the output file. The file will be created at the first call + * to the method. + * @param tMtxLock - a lock on the base class's mutex tMtx + * @param task - a task that produced the result set + * @param msg - data to be written + * @throws std::runtime_error for problems encountered when attemting to create the file + * or write into the file. + */ + void _writeToFile(std::lock_guard const& tMtxLock, std::shared_ptr const& task, + std::string const& msg); + + /** + * Unconditionaly close and remove (potentially - the partially written) file. + * This method gets called in case of any failure detected while processing + * a query, sending a response back to Czar, or in case of a query cancellation. + * @note For succesfully completed requests the files are deleted remotely + * upon special requests made explicitly by Czar after uploading and consuming + * result sets. Unclaimed files that might be still remaining at the results + * folder would need to be garbage collected at the startup time of the worker. + * @param tMtxLock - a lock on the base class's mutex tMtx + */ + void _removeFile(std::lock_guard const& tMtxLock); + + /// The mutex is locked by the following static methods which require exclusive + /// access to the results folder: create(), cleanUpResultsOnCzarRestart(), + /// cleanUpResultsOnWorkerRestart(), and cleanUpResults(). + static std::mutex _resultsDirCleanupMtx; + + std::string _fileName; ///< The name is set when opening the file + std::fstream _file; + + // Counters reported to Czar in the only ("summary") message sent upon the completion + // of all tasks of a query. + + uint32_t _rowcount = 0; ///< The total numnber of rows in all result sets of a query. + uint64_t _transmitsize = 0; ///< The total amount of data (bytes) in all result sets of a query. +}; + +} // namespace lsst::qserv::wbase + +#endif // LSST_QSERV_WBASE_FILECHANNELSHARED_H diff --git a/src/wbase/Task.cc b/src/wbase/Task.cc index 3fa7b1c63d..1871c67b2d 100644 --- a/src/wbase/Task.cc +++ b/src/wbase/Task.cc @@ -33,9 +33,12 @@ // System headers #include +#include // Third-party headers #include +#include "boost/asio.hpp" +#include "boost/filesystem.hpp" // LSST headers #include "lsst/log/Log.h" @@ -54,15 +57,38 @@ #include "wbase/Base.h" #include "wbase/ChannelShared.h" #include "wbase/UserQueryInfo.h" +#include "wconfig/WorkerConfig.h" +#include "wdb/QueryRunner.h" #include "wpublish/QueriesAndChunks.h" using namespace std; using namespace std::chrono_literals; +namespace fs = boost::filesystem; namespace { LOG_LOGGER _log = LOG_GET("lsst.qserv.wbase.Task"); +string get_hostname() { + // Get the short name of the current host. + boost::system::error_code ec; + string const hostname = boost::asio::ip::host_name(ec); + if (ec.value() != 0) { + throw runtime_error("Task::" + string(__func__) + + " boost::asio::ip::host_name failed: " + ec.category().name() + string(":") + + to_string(ec.value()) + "[" + ec.message() + "]"); + } + return hostname; +} + +string buildResultFilePath(shared_ptr const& taskMsg, + string const& resultsDirname) { + if (resultsDirname.empty()) return resultsDirname; + fs::path path(resultsDirname); + path /= to_string(taskMsg->queryid()) + "-" + to_string(taskMsg->jobid()) + "-" + + to_string(taskMsg->chunkid()) + "-" + to_string(taskMsg->attemptcount()) + ".proto"; + return path.string(); +} } // namespace namespace lsst::qserv::wbase { @@ -101,7 +127,8 @@ std::atomic taskSequence{0}; /// Command::setFunc() is used set the action later. This is why /// the util::CommandThreadPool is not called here. Task::Task(TaskMsgPtr const& t, int fragmentNumber, std::shared_ptr const& userQueryInfo, - size_t templateId, int subchunkId, std::shared_ptr const& sc) + size_t templateId, int subchunkId, std::shared_ptr const& sc, + uint16_t resultsHttpPort) : _userQueryInfo(userQueryInfo), _sendChannel(sc), _tSeq(++taskSequence), @@ -119,6 +146,25 @@ Task::Task(TaskMsgPtr const& t, int fragmentNumber, std::shared_ptrhas_db() ? t->db() : ""), _protocol(t->has_protocol() ? t->protocol() : -1), _czarId(t->has_czarid() ? t->czarid() : -1) { + // These attributes will be passed back to Czar in the Protobuf response + // to advice which result delivery channel to use. + auto const workerConfig = wconfig::WorkerConfig::instance(); + auto const resultDeliveryProtocol = workerConfig->resultDeliveryProtocol(); + if (resultDeliveryProtocol != wconfig::WorkerConfig::ResultDeliveryProtocol::SSI) { + _resultFilePath = ::buildResultFilePath(t, workerConfig->resultsDirname()); + if (resultDeliveryProtocol == wconfig::WorkerConfig::ResultDeliveryProtocol::XROOT) { + // NOTE: one extra '/' after the [:] spec is required to make + // a "valid" XROOTD url. + _resultFileXrootUrl = "xroot://" + ::get_hostname() + ":" + + to_string(workerConfig->resultsXrootdPort()) + "/" + _resultFilePath; + } else if (resultDeliveryProtocol == wconfig::WorkerConfig::ResultDeliveryProtocol::HTTP) { + _resultFileHttpUrl = + "http://" + ::get_hostname() + ":" + to_string(resultsHttpPort) + _resultFilePath; + } else { + throw std::runtime_error("wbase::Task::Task: unsupported results delivery protocol: " + + wconfig::WorkerConfig::protocol2str(resultDeliveryProtocol)); + } + } if (t->has_user()) { user = t->user(); } else { @@ -194,7 +240,8 @@ std::vector Task::createTasks(std::shared_ptr const& std::shared_ptr const& sendChannel, std::shared_ptr const& chunkResourceMgr, mysql::MySqlConfig const& mySqlConfig, - std::shared_ptr const& sqlConnMgr) { + std::shared_ptr const& sqlConnMgr, + uint16_t resultsHttpPort) { QueryId qId = taskMsg->queryid(); QSERV_LOGCONTEXT_QUERY_JOB(qId, taskMsg->jobid()); std::vector vect; @@ -215,13 +262,13 @@ std::vector Task::createTasks(std::shared_ptr const& if (fragment.has_subchunks() && not fragment.subchunks().id().empty()) { for (auto subchunkId : fragment.subchunks().id()) { auto task = std::make_shared(taskMsg, fragNum, userQueryInfo, templateId, - subchunkId, sendChannel); + subchunkId, sendChannel, resultsHttpPort); vect.push_back(task); } } else { int subchunkId = -1; // there are no subchunks. auto task = std::make_shared(taskMsg, fragNum, userQueryInfo, templateId, - subchunkId, sendChannel); + subchunkId, sendChannel, resultsHttpPort); vect.push_back(task); } } @@ -229,27 +276,17 @@ std::vector Task::createTasks(std::shared_ptr const& for (auto task : vect) { /// Set the function called when it is time to process the task. auto func = [task, chunkResourceMgr, mySqlConfig, sqlConnMgr](util::CmdData*) { - proto::TaskMsg const& msg = *task->msg; - int const resultProtocol = 2; // See proto/worker.proto Result protocol - if (!msg.has_protocol() || msg.protocol() < resultProtocol) { - LOGS(_log, LOG_LVL_WARN, "processMsg Unsupported wire protocol"); - if (!task->checkCancelled()) { - // We should not send anything back to xrootd if the task has been cancelled. - task->getSendChannel()->sendError("Unsupported wire protocol", 1); - } - } else { - auto qr = wdb::QueryRunner::newQueryRunner(task, chunkResourceMgr, mySqlConfig, sqlConnMgr); - bool success = false; - try { - success = qr->runQuery(); - } catch (UnsupportedError const& e) { - LOGS(_log, LOG_LVL_ERROR, "runQuery threw UnsupportedError " << e.what() << *task); - } - if (not success) { - LOGS(_log, LOG_LVL_ERROR, "runQuery failed " << *task); - if (not task->getSendChannel()->kill("Foreman::_setRunFunc")) { - LOGS(_log, LOG_LVL_WARN, "runQuery sendChannel killed"); - } + auto qr = wdb::QueryRunner::newQueryRunner(task, chunkResourceMgr, mySqlConfig, sqlConnMgr); + bool success = false; + try { + success = qr->runQuery(); + } catch (UnsupportedError const& e) { + LOGS(_log, LOG_LVL_ERROR, "runQuery threw UnsupportedError " << e.what() << *task); + } + if (not success) { + LOGS(_log, LOG_LVL_ERROR, "runQuery failed " << *task); + if (not task->getSendChannel()->kill("Foreman::_setRunFunc")) { + LOGS(_log, LOG_LVL_WARN, "runQuery sendChannel killed"); } } // Transmission is done, but 'task' contains statistics that are still useful. diff --git a/src/wbase/Task.h b/src/wbase/Task.h index a8e575b6d9..eba22bcdd2 100644 --- a/src/wbase/Task.h +++ b/src/wbase/Task.h @@ -48,27 +48,25 @@ #include "util/ThreadPool.h" // Forward declarations -namespace lsst::qserv { -namespace mysql { +namespace lsst::qserv::mysql { class MySqlConfig; -} // namespace mysql -namespace proto { +} +namespace lsst::qserv::proto { class TaskMsg; class TaskMsg_Fragment; -} // namespace proto -namespace wbase { +} // namespace lsst::qserv::proto +namespace lsst::qserv::wbase { class ChannelShared; -} // namespace wbase -namespace wcontrol { +} +namespace lsst::qserv::wcontrol { class SqlConnMgr; -} // namespace wcontrol -namespace wdb { +} +namespace lsst::qserv::wdb { class ChunkResourceMgr; -} // namespace wdb -namespace wpublish { +} +namespace lsst::qserv::wpublish { class QueryStatistics; } -} // namespace lsst::qserv namespace lsst::qserv::wbase { @@ -157,7 +155,8 @@ class Task : public util::CommandForThreadPool { }; Task(TaskMsgPtr const& t, int fragmentNumber, std::shared_ptr const& userQueryInfo, - size_t templateId, int subchunkId, std::shared_ptr const& sc); + size_t templateId, int subchunkId, std::shared_ptr const& sc, + uint16_t resultsHttpPort = 8080); Task& operator=(const Task&) = delete; Task(const Task&) = delete; virtual ~Task(); @@ -167,7 +166,8 @@ class Task : public util::CommandForThreadPool { std::shared_ptr const& sendChannel, std::shared_ptr const& chunkResourceMgr, mysql::MySqlConfig const& mySqlConfig, - std::shared_ptr const& sqlConnMgr); + std::shared_ptr const& sqlConnMgr, + uint16_t resultsHttpPort = 8080); void setQueryStatistics(std::shared_ptr const& qC); @@ -199,6 +199,9 @@ class Task : public util::CommandForThreadPool { TaskState state() const { return _state; } std::string getQueryString() const; int getQueryFragmentNum() { return _queryFragmentNum; } + std::string const& resultFilePath() const { return _resultFilePath; } + std::string const& resultFileXrootUrl() const { return _resultFileXrootUrl; } + std::string const& resultFileHttpUrl() const { return _resultFileHttpUrl; } bool setTaskQueryRunner( TaskQueryRunner::Ptr const& taskQueryRunner); ///< return true if already cancelled. void freeTaskQueryRunner(TaskQueryRunner* tqr); @@ -301,6 +304,16 @@ class Task : public util::CommandForThreadPool { /// Set of tables and vector of subchunk ids used by ChunkResourceRequest. Do not change/reset. std::unique_ptr _dbTblsAndSubchunks; + /// The path to the result file. + std::string _resultFilePath; + + /// The XROOTD URL for the result file: "xroot://:" + "/" + _resultFilePath + /// @note an extra '/' after server:port spec is required to make a "valid" XROOTD url + std::string _resultFileXrootUrl; + + /// The HTTP URL for the result file: "http://:" + _resultFilePath + std::string _resultFileHttpUrl; + std::atomic _cancelled{false}; std::atomic _safeToMoveRunning{false}; ///< false until done with waitForMemMan(). TaskQueryRunner::Ptr _taskQueryRunner; diff --git a/src/wbase/TransmitData.cc b/src/wbase/TransmitData.cc index bf78082744..4cbe64f922 100644 --- a/src/wbase/TransmitData.cc +++ b/src/wbase/TransmitData.cc @@ -154,8 +154,13 @@ void TransmitData::_buildHeader(lock_guard const& lock) { } void TransmitData::buildDataMsg(Task const& task, util::MultiError& multiErr) { - QSERV_LOGCONTEXT_QUERY_JOB(task.getQueryId(), task.getJobId()); lock_guard const lock(_trMtx); + _buildDataMsg(lock, task, multiErr); +} + +void TransmitData::_buildDataMsg(lock_guard const& lock, Task const& task, + util::MultiError& multiErr) { + QSERV_LOGCONTEXT_QUERY_JOB(task.getQueryId(), task.getJobId()); LOGS(_log, LOG_LVL_INFO, _idStr << "TransmitData::_buildDataMsg rowCount=" << _rowCount << " tSize=" << _tSize); assert(_result != nullptr); @@ -184,6 +189,8 @@ void TransmitData::initResult(Task& task) { if (task.getSession() >= 0) { _result->set_session(task.getSession()); } + _result->set_fileresource_xroot(task.resultFileXrootUrl()); + _result->set_fileresource_http(task.resultFileHttpUrl()); } bool TransmitData::hasErrormsg() const { return _result->has_errormsg(); } @@ -220,6 +227,21 @@ bool TransmitData::fillRows(MYSQL_RES* mResult, size_t& sz) { return true; } +void TransmitData::prepareResponse(Task const& task, uint32_t rowcount, uint64_t transmitsize) { + lock_guard const lock(_trMtx); + _rowCount = rowcount; + _tSize = transmitsize; + _result->clear_row(); + // Rebuild the message + util::MultiError multiErr; + _buildDataMsg(lock, task, multiErr); +} + +size_t TransmitData::getResultTransmitSize() const { + lock_guard const lock(_trMtx); + return _tSize; +} + int TransmitData::getResultSize() const { lock_guard const lock(_trMtx); return _dataMsg.size(); diff --git a/src/wbase/TransmitData.h b/src/wbase/TransmitData.h index 7f0e438d10..b332b24d80 100644 --- a/src/wbase/TransmitData.h +++ b/src/wbase/TransmitData.h @@ -99,12 +99,22 @@ class TransmitData { /// true if there are no more rows remaining in mResult. bool fillRows(MYSQL_RES* mResult, size_t& sz); + /// Prepare the summary response by emptying the payload (rows) and setting + /// the counters. + /// @param task - the task responsible for the change + /// @param rowcount - the total number of rows in a result set of a query + /// @param transmitsize - teh total size (bytes) of a result set of a query + void prepareResponse(Task const& task, uint32_t rowcount, uint64_t transmitsize); + /// Use the information collected in _result and multiErr to build _dataMsg. void buildDataMsg(Task const& task, util::MultiError& multiErr); /// @return true if tData has an error message in _result. bool hasErrormsg() const; + /// @return the size of the result (the 'transmitsize' of the result) in bytes. + size_t getResultTransmitSize() const; + /// @return the size of the result in bytes. int getResultSize() const; @@ -121,6 +131,9 @@ class TransmitData { TransmitData(qmeta::CzarId const& czarId, std::shared_ptr const& arena, std::string const& idStr); + /// @see TrasnmitData::buildDataMsg + void _buildDataMsg(std::lock_guard const& lock, Task const& task, util::MultiError& multiErr); + /// Create a header for an empty result using our arena. /// This does not set the 'header' member of this object as there is a /// case where an empty header is needed to append to the result. diff --git a/src/wcontrol/CMakeLists.txt b/src/wcontrol/CMakeLists.txt index 121e649443..a8147b5c1c 100644 --- a/src/wcontrol/CMakeLists.txt +++ b/src/wcontrol/CMakeLists.txt @@ -15,4 +15,5 @@ target_include_directories(wcontrol PRIVATE target_link_libraries(wcontrol PUBLIC log XrdSsiLib + qhttp ) diff --git a/src/wcontrol/Foreman.cc b/src/wcontrol/Foreman.cc index 0f063d901f..1c79e456bb 100644 --- a/src/wcontrol/Foreman.cc +++ b/src/wcontrol/Foreman.cc @@ -26,38 +26,69 @@ // System headers #include +#include + +// Third party headers +#include "boost/filesystem.hpp" // LSST headers #include "lsst/log/Log.h" // Qserv headers #include "mysql/MySqlConfig.h" +#include "qhttp/Request.h" +#include "qhttp/Response.h" +#include "qhttp/Server.h" +#include "qhttp/Status.h" #include "wbase/WorkerCommand.h" +#include "wconfig/WorkerConfig.h" #include "wcontrol/SqlConnMgr.h" #include "wcontrol/WorkerStats.h" #include "wdb/ChunkResource.h" #include "wdb/SQLBackend.h" #include "wpublish/QueriesAndChunks.h" +using namespace std; +namespace fs = boost::filesystem; +namespace qhttp = lsst::qserv::qhttp; + namespace { LOG_LOGGER _log = LOG_GET("lsst.qserv.wcontrol.Foreman"); -} -using namespace std; +/// Remove the result file specified in the parameter of the method. +/// @param fileName An absolute path name to a file to be removed. +/// @return The HTTP status code which depends on the status of the requested +/// file and the outcome of the operation. +qhttp::Status removeResultFile(std::string const& fileName) { + string const context = "Foreman::" + string(__func__) + " "; + fs::path const filePath(fileName); + if (!fs::exists(filePath)) return qhttp::STATUS_NOT_FOUND; + boost::system::error_code ec; + fs::remove_all(filePath, ec); + if (ec.value() != 0) { + LOGS(_log, LOG_LVL_WARN, + context << "failed to remove the result file: " << fileName << ", code: " << ec.value() + << ", error:" << ec.message()); + return qhttp::STATUS_INTERNAL_SERVER_ERR; + } + LOGS(_log, LOG_LVL_DEBUG, context << "result file removed: " << fileName); + return qhttp::STATUS_OK; +} +} // namespace namespace lsst::qserv::wcontrol { Foreman::Foreman(Scheduler::Ptr const& scheduler, unsigned int poolSize, unsigned int maxPoolThreads, mysql::MySqlConfig const& mySqlConfig, wpublish::QueriesAndChunks::Ptr const& queries, std::shared_ptr const& sqlConnMgr, - std::shared_ptr const& transmitMgr, - wconfig::WorkerConfig const& workerConfig) + std::shared_ptr const& transmitMgr) : _scheduler(scheduler), _mySqlConfig(mySqlConfig), _queries(queries), _sqlConnMgr(sqlConnMgr), _transmitMgr(transmitMgr), - _workerConfig(workerConfig) { + _io_service(), + _httpServer(qhttp::Server::create(_io_service, 0 /* grab the first available port */)) { // Make the chunk resource mgr // Creating backend makes a connection to the database for making temporary tables. // It will delete temporary tables that it can identify as being created by a worker. @@ -76,6 +107,34 @@ Foreman::Foreman(Scheduler::Ptr const& scheduler, unsigned int poolSize, unsigne WorkerStats::setup(); // FUTURE: maybe add links to scheduler, _backend, etc? _mark = make_shared(ERR_LOC, "Forman Test Msg"); + + // Read-only access to the result files via the HTTP protocol's method "GET" + // + // NOTE: The following config doesn't seem to work due to multiple instances + // of '/' that's present in a value passed for the pattern parameter + // (the first parameter) of the called method. + // + // _httpServer->addStaticContent(workerConfig->resultsDirname() + "/*", "/"); + // + // Using this insecure config instead. The problem will get fixed later. + auto const workerConfig = wconfig::WorkerConfig::instance(); + _httpServer->addStaticContent("/*", "/"); + _httpServer->addHandler("DELETE", workerConfig->resultsDirname() + "/:file", + [](qhttp::Request::Ptr const req, qhttp::Response::Ptr const resp) { + resp->sendStatus(::removeResultFile(req->path)); + }); + + // The HTTP server should be started before launching the threads to prevent + // the thread from exiting prematurely due to a lack of work. The threads + // will stop automatically when the server will be requested to stop in + // the destructor of the current class. + _httpServer->start(); + assert(workerConfig->resultsNumHttpThreads() > 0); + for (size_t i = 0; i < workerConfig->resultsNumHttpThreads(); ++i) { + std::thread t([this]() { _io_service.run(); }); + t.detach(); + } + LOGS(_log, LOG_LVL_DEBUG, "qhttp started on port=" << _httpServer->getPort()); } Foreman::~Foreman() { @@ -83,6 +142,7 @@ Foreman::~Foreman() { // It will take significant effort to have xrootd shutdown cleanly and this will never get called // until that happens. _pool->shutdownPool(); + _httpServer->stop(); } void Foreman::processTasks(vector const& tasks) { @@ -98,6 +158,8 @@ void Foreman::processCommand(shared_ptr const& command) { _workerCommandQueue->queCmd(command); } +uint16_t Foreman::httpPort() const { return _httpServer->getPort(); } + nlohmann::json Foreman::statusToJson(wbase::TaskSelector const& taskSelector) { nlohmann::json status; status["queries"] = _queries->statusToJson(taskSelector); diff --git a/src/wcontrol/Foreman.h b/src/wcontrol/Foreman.h index 8796b425a9..a6ba858047 100644 --- a/src/wcontrol/Foreman.h +++ b/src/wcontrol/Foreman.h @@ -32,6 +32,9 @@ // Third party headers #include "nlohmann/json.hpp" +// Third party headers +#include "boost/asio.hpp" + // Qserv headers #include "util/EventThread.h" #include "util/HoldTrack.h" @@ -49,9 +52,9 @@ namespace lsst::qserv::mysql { class MySqlConfig; } // namespace lsst::qserv::mysql -namespace lsst::qserv::wconfig { -class WorkerConfig; -} // namespace lsst::qserv::wconfig +namespace lsst::qserv::qhttp { +class Server; +} // namespace lsst::qserv::qhttp namespace lsst::qserv::wdb { class ChunkResourceMgr; @@ -95,19 +98,17 @@ class Scheduler : public wbase::TaskScheduler, public util::CommandQueue { class Foreman : public wbase::MsgProcessor { public: /** - * @param scheduler - pointer to the scheduler - * @param poolSize - size of the thread pool - * @param mySqlConfig - configuration object for the MySQL service - * @param queries - query statistics collector - * @param sqlConnMgr - for limiting the number of MySQL connections used for tasks - * @param transmitMgr - for throttling outgoing massages to prevent czars from being overloaded - * @param workerConfig - worker configuration parameters + * @param scheduler - pointer to the scheduler + * @param poolSize - size of the thread pool + * @param mySqlConfig - configuration object for the MySQL service + * @param queries - query statistics collector + * @param sqlConnMgr - for limiting the number of MySQL connections used for tasks + * @param transmitMgr - for throttling outgoing massages to prevent czars from being overloaded */ Foreman(Scheduler::Ptr const& scheduler, unsigned int poolSize, unsigned int maxPoolThreads, mysql::MySqlConfig const& mySqlConfig, std::shared_ptr const& queries, std::shared_ptr const& sqlConnMgr, - std::shared_ptr const& transmitMgr, - wconfig::WorkerConfig const& workerConfig); + std::shared_ptr const& transmitMgr); virtual ~Foreman() override; @@ -120,7 +121,7 @@ class Foreman : public wbase::MsgProcessor { mysql::MySqlConfig const& mySqlConfig() const { return _mySqlConfig; } std::shared_ptr const& sqlConnMgr() const { return _sqlConnMgr; } std::shared_ptr const& transmitMgr() const { return _transmitMgr; } - wconfig::WorkerConfig const& workerConfig() const { return _workerConfig; } + uint16_t httpPort() const; /// Process a group of query processing tasks. /// @see MsgProcessor::processTasks() @@ -154,8 +155,11 @@ class Foreman : public wbase::MsgProcessor { /// Used to throttle outgoing massages to prevent czars from being overloaded. std::shared_ptr const _transmitMgr; - /// Worker configuration parameters. - wconfig::WorkerConfig const& _workerConfig; + /// BOOST ASIO services needed to run the HTTP server + boost::asio::io_service _io_service; + + /// The HTTP server for serving/managing result files + std::shared_ptr const _httpServer; }; } // namespace lsst::qserv::wcontrol diff --git a/src/xrdsvc/SsiRequest.cc b/src/xrdsvc/SsiRequest.cc index c6fb005657..7dec4df117 100644 --- a/src/xrdsvc/SsiRequest.cc +++ b/src/xrdsvc/SsiRequest.cc @@ -26,6 +26,7 @@ #include #include #include +#include #include // Third-party headers @@ -43,9 +44,11 @@ #include "util/InstanceCount.h" #include "util/HoldTrack.h" #include "util/Timer.h" +#include "wbase/FileChannelShared.h" #include "wbase/SendChannelShared.h" #include "wbase/TaskState.h" #include "wbase/Task.h" +#include "wconfig/WorkerConfig.h" #include "wcontrol/Foreman.h" #include "wpublish/AddChunkGroupCommand.h" #include "wpublish/ChunkListCommand.h" @@ -183,11 +186,24 @@ void SsiRequest::execute(XrdSsiRequest& req) { " czarid:" + std::to_string(taskMsg->has_czarid())); return; } - auto const sendChannelShared = - wbase::SendChannelShared::create(sendChannel, _foreman->transmitMgr(), taskMsg->czarid()); - auto const tasks = - wbase::Task::createTasks(taskMsg, sendChannelShared, _foreman->chunkResourceMgr(), - _foreman->mySqlConfig(), _foreman->sqlConnMgr()); + std::shared_ptr channelShared; + switch (wconfig::WorkerConfig::instance()->resultDeliveryProtocol()) { + case wconfig::WorkerConfig::ResultDeliveryProtocol::SSI: + channelShared = wbase::SendChannelShared::create(sendChannel, _foreman->transmitMgr(), + taskMsg->czarid()); + break; + case wconfig::WorkerConfig::ResultDeliveryProtocol::XROOT: + case wconfig::WorkerConfig::ResultDeliveryProtocol::HTTP: + channelShared = + wbase::FileChannelShared::create(sendChannel, _foreman->transmitMgr(), taskMsg); + break; + default: + throw std::runtime_error("SsiRequest::" + std::string(__func__) + + " unsupported result delivery protocol"); + } + auto const tasks = wbase::Task::createTasks(taskMsg, channelShared, _foreman->chunkResourceMgr(), + _foreman->mySqlConfig(), _foreman->sqlConnMgr(), + _foreman->httpPort()); for (auto const& task : tasks) { _tasks.push_back(task); } @@ -242,6 +258,39 @@ void SsiRequest::execute(XrdSsiRequest& req) { "QueryManagement: op=" << proto::QueryManagement_Operation_Name(request.op()) << " query_id=" << request.query_id()); + switch (wconfig::WorkerConfig::instance()->resultDeliveryProtocol()) { + case wconfig::WorkerConfig::ResultDeliveryProtocol::SSI: + // TODO: locate and cancel the coresponding tasks, remove the tasks + // from the scheduler queues. + break; + case wconfig::WorkerConfig::ResultDeliveryProtocol::XROOT: + case wconfig::WorkerConfig::ResultDeliveryProtocol::HTTP: + switch (request.op()) { + case proto::QueryManagement::CANCEL_AFTER_RESTART: + // TODO: locate and cancel the coresponding tasks, remove the tasks + // from the scheduler queues. + wbase::FileChannelShared::cleanUpResultsOnCzarRestart(request.query_id()); + break; + case proto::QueryManagement::CANCEL: + // TODO: locate and cancel the coresponding tasks, remove the tasks + // from the scheduler queues. + wbase::FileChannelShared::cleanUpResults(request.query_id()); + break; + case proto::QueryManagement::COMPLETE: + wbase::FileChannelShared::cleanUpResults(request.query_id()); + break; + default: + reportError("QueryManagement: op=" + + proto::QueryManagement_Operation_Name(request.op()) + + " is not supported by the current implementation."); + return; + } + break; + default: + throw std::runtime_error("SsiRequest::" + std::string(__func__) + + " unsupported result delivery protocol"); + } + // Send back the empty response since no info is expected by a caller // for this type of requests beyond the usual error notifications (if any). this->reply((char const*)0, 0); diff --git a/src/xrdsvc/SsiService.cc b/src/xrdsvc/SsiService.cc index d0a9308cd0..2d98a5df9b 100644 --- a/src/xrdsvc/SsiService.cc +++ b/src/xrdsvc/SsiService.cc @@ -47,6 +47,7 @@ #include "util/FileMonitor.h" #include "util/HoldTrack.h" #include "wbase/Base.h" +#include "wbase/FileChannelShared.h" #include "wconfig/WorkerConfig.h" #include "wconfig/WorkerConfigError.h" #include "wcontrol/Foreman.h" @@ -75,7 +76,7 @@ int dummyInitMDC = LOG_MDC_INIT(initMDC); namespace lsst::qserv::xrdsvc { -SsiService::SsiService(XrdSsiLogger* log, wconfig::WorkerConfig const& workerConfig) +SsiService::SsiService(XrdSsiLogger* log) : _mySqlConfig(wconfig::WorkerConfig::instance()->getMySqlConfig()) { LOGS(_log, LOG_LVL_DEBUG, "SsiService starting..."); @@ -169,7 +170,7 @@ SsiService::SsiService(XrdSsiLogger* log, wconfig::WorkerConfig const& workerCon LOGS(_log, LOG_LVL_WARN, "maxPoolThreads=" << maxPoolThreads); _foreman = make_shared(blendSched, poolSize, maxPoolThreads, _mySqlConfig, queries, - sqlConnMgr, transmitMgr, workerConfig); + sqlConnMgr, transmitMgr); // Watch to see if the log configuration is changed. // If LSST_LOG_CONFIG is not defined, there's no good way to know what log @@ -182,6 +183,13 @@ SsiService::SsiService(XrdSsiLogger* log, wconfig::WorkerConfig const& workerCon LOGS(_log, LOG_LVL_ERROR, "logConfigFile=" << logConfigFile); _logFileMonitor = make_shared(logConfigFile); } + + // Garbage collect unclaimed result files (if any). + // ATTENTION: this is the blocking operation since it needs to be run before accepting + // new queries to ensure that worker had sufficient resources to process those. + if (workerConfig->resultsCleanUpOnStart()) { + wbase::FileChannelShared::cleanUpResultsOnWorkerRestart(); + } } SsiService::~SsiService() { LOGS(_log, LOG_LVL_DEBUG, "SsiService dying."); } From 037645004ce16377076f97863fbe734ff0e4b79b Mon Sep 17 00:00:00 2001 From: Igor Gaponenko Date: Mon, 20 Mar 2023 05:13:33 +0000 Subject: [PATCH 6/8] Eliminated dead code in the result merger at Czar It was the trivial refactpring of the code meant to simplify the code and prepare it for the functional extention. --- src/rproc/InfileMerger.cc | 69 --------------------------------------- src/rproc/InfileMerger.h | 4 --- 2 files changed, 73 deletions(-) diff --git a/src/rproc/InfileMerger.cc b/src/rproc/InfileMerger.cc index 8a2bacce8c..1375008696 100644 --- a/src/rproc/InfileMerger.cc +++ b/src/rproc/InfileMerger.cc @@ -635,75 +635,6 @@ bool InfileMerger::_sqlConnect(sql::SqlErrorObject& errObj) { return true; } -size_t InfileMerger::_getResultTableSizeMB() { - std::string tableSizeSql = std::string("SELECT table_name, ") + - "round(((data_length + index_length) / 1048576), 2) as 'MB' " + - "FROM information_schema.TABLES " + "WHERE table_schema = '" + - _config.mySqlConfig.dbName + "' AND table_name = '" + _mergeTable + "'"; - LOGS(_log, LOG_LVL_TRACE, "Checking ResultTableSize " << tableSizeSql); - std::lock_guard m(_sqlMutex); - sql::SqlErrorObject errObj; - sql::SqlResults results; - if (not _sqlConnect(errObj)) { - return 0; - } - if (not _sqlConn->runQuery(tableSizeSql, results, errObj)) { - _error = util::Error(errObj.errNo(), "error getting size sql: " + errObj.printErrMsg(), - util::ErrorCode::MYSQLEXEC); - LOGS(_log, LOG_LVL_ERROR, "result table size error: " << _error.getMsg() << tableSizeSql); - return 0; - } - - // There should only be 1 row - auto iter = results.begin(); - if (iter == results.end()) { - LOGS(_log, LOG_LVL_ERROR, "result table size no rows returned " << _mergeTable); - return 0; - } - auto& row = *iter; - std::string tbName = row[0].first; - std::string tbSize = row[1].first; - size_t sz = std::stoul(tbSize); - LOGS(_log, LOG_LVL_TRACE, - "Checking ResultTableSize " << tableSizeSql << " ResultTableSizeMB tbl=" << tbName - << " tbSize=" << tbSize); - return sz; -} - -/// Read a ProtoHeader message from a buffer and return the number of bytes -/// consumed. -int InfileMerger::_readHeader(proto::ProtoHeader& header, char const* buffer, int length) { - if (not proto::ProtoImporter::setMsgFrom(header, buffer, length)) { - // This is only a real error if there are no more bytes. - _error = InfileMergerError(util::ErrorCode::HEADER_IMPORT, - _getQueryIdStr() + " Error decoding protobuf header"); - return 0; - } - return length; -} - -/// Read a Result message and return the number of bytes consumed. -int InfileMerger::_readResult(proto::Result& result, char const* buffer, int length) { - if (not proto::ProtoImporter::setMsgFrom(result, buffer, length)) { - _error = InfileMergerError(util::ErrorCode::RESULT_IMPORT, - _getQueryIdStr() + "Error decoding result message"); - throw _error; - } - // result.PrintDebugString(); - return length; -} - -/// Verify that the sessionId is the same as what we were expecting. -/// This is an additional safety check to protect from importing a message from -/// another session. -/// TODO: this is incomplete. -bool InfileMerger::_verifySession(int sessionId) { - if (false) { - _error = InfileMergerError(util::ErrorCode::RESULT_IMPORT, "Session id mismatch"); - } - return true; // TODO: for better message integrity -} - /// Choose the appropriate target name, depending on whether post-processing is /// needed on the result rows. void InfileMerger::_fixupTargetName() { diff --git a/src/rproc/InfileMerger.h b/src/rproc/InfileMerger.h index bf4aa90aea..6d093a3cb9 100644 --- a/src/rproc/InfileMerger.h +++ b/src/rproc/InfileMerger.h @@ -240,9 +240,6 @@ class InfileMerger { bool _applyMysqlMyIsam(std::string const& query); bool _applyMysqlInnoDb(std::string const& query); bool _merge(std::shared_ptr& response); - int _readHeader(proto::ProtoHeader& header, char const* buffer, int length); - int _readResult(proto::Result& result, char const* buffer, int length); - bool _verifySession(int sessionId); void _setupRow(); bool _applySql(std::string const& sql); bool _applySqlLocal(std::string const& sql, std::string const& logMsg, sql::SqlResults& results); @@ -274,7 +271,6 @@ class InfileMerger { InfileMergerError _error; ///< Error state bool _isFinished = false; ///< Completed? std::mutex _sqlMutex; ///< Protection for SQL connection - size_t _getResultTableSizeMB(); ///< Return the size of the result table in MB. /** * @brief Put a "jobId" column first in the provided schema. From 4260547df188568d1fb3b2bd37a98dcee6b3820e Mon Sep 17 00:00:00 2001 From: Igor Gaponenko Date: Fri, 10 Mar 2023 07:12:53 +0000 Subject: [PATCH 7/8] Modified Czar to allow pulling result files from workers The new version of the result merger at Czar would dynamically determine which protocol should be used for pulling results from workers: SSI stream, a file read via the XROOTD file protocol, or a file read via the HTTP protocol. In case of the last two options, the merger would also also utomatically tell a worker to delete the result file upon completion of the merge (including the unsuccessful ones). A choice of the delivery method is based on the optional fileds in the Protobuf messages received from workers. It's up to the workers to decide on what method to select. --- src/ccontrol/CMakeLists.txt | 3 + src/ccontrol/MergingHandler.cc | 287 ++++++++++++++++++++++++++++++++- 2 files changed, 287 insertions(+), 3 deletions(-) diff --git a/src/ccontrol/CMakeLists.txt b/src/ccontrol/CMakeLists.txt index f40e15094b..e037a93592 100644 --- a/src/ccontrol/CMakeLists.txt +++ b/src/ccontrol/CMakeLists.txt @@ -3,6 +3,7 @@ add_dependencies(ccontrol proto) target_include_directories(ccontrol PRIVATE ${ANTLR4_INCLUDE_DIR} + ${XROOTD_INCLUDE_DIRS} ) target_sources(ccontrol PRIVATE @@ -28,8 +29,10 @@ target_link_libraries(ccontrol PUBLIC boost_regex log parser + replica sphgeom xrdreq + XrdCl ) FUNCTION(ccontrol_tests) diff --git a/src/ccontrol/MergingHandler.cc b/src/ccontrol/MergingHandler.cc index 3b30922d63..e9935b7f45 100644 --- a/src/ccontrol/MergingHandler.cc +++ b/src/ccontrol/MergingHandler.cc @@ -25,7 +25,12 @@ #include "ccontrol/MergingHandler.h" // System headers +#include #include +#include + +// Third-party headers +#include "XrdCl/XrdClFile.hh" // LSST headers #include "lsst/log/Log.h" @@ -38,22 +43,269 @@ #include "proto/ProtoImporter.h" #include "proto/WorkerResponse.h" #include "qdisp/JobQuery.h" +#include "replica/HttpClient.h" #include "rproc/InfileMerger.h" #include "util/Bug.h" #include "util/common.h" #include "util/StringHash.h" using lsst::qserv::proto::ProtoHeader; +using lsst::qserv::proto::ProtoHeaderWrap; using lsst::qserv::proto::ProtoImporter; using lsst::qserv::proto::Result; using lsst::qserv::proto::WorkerResponse; +using lsst::qserv::replica::HttpClient; using namespace std; namespace { LOG_LOGGER _log = LOG_GET("lsst.qserv.ccontrol.MergingHandler"); + +string xrootdStatus2str(XrdCl::XRootDStatus const& s) { + return "status=" + to_string(s.status) + ", code=" + to_string(s.code) + ", errNo=" + to_string(s.errNo) + + ", message='" + s.GetErrorMessage() + "'"; +} + +/** + * Extract the file path (including both slashes) from the XROOTD-style URL. + * Input: + * @code + * "xroot://://"" + * @code + * Output: + * @code + * "//"" + * @code + */ +string xrootUrl2path(string const& xrootUrl) { + string const delim = "//"; + auto firstPos = xrootUrl.find(delim, 0); + if (string::npos != firstPos) { + // Resume serching at the first character following the delimiter. + auto secondPos = xrootUrl.find(delim, firstPos + 2); + if (string::npos != secondPos) { + return xrootUrl.substr(secondPos); + } + } + throw runtime_error("MergingHandler::" + string(__func__) + " illegal file resource url: " + xrootUrl); +} + +bool readXrootFileResourceAndMerge(lsst::qserv::proto::Result const& result, + function const& messageIsReady) { + string const context = "MergingHandler::" + string(__func__) + " "; + + // Extract data from the input result object before modifying the one. + string const xrootUrl = result.fileresource_xroot(); + + // The algorithm will read the input file to locate result objects containing rows + // and call the provided callback for each such row. + XrdCl::File file; + XrdCl::XRootDStatus status; + status = file.Open(xrootUrl, XrdCl::OpenFlags::Read); + if (!status.IsOK()) { + LOGS(_log, LOG_LVL_ERROR, + context << "failed to open " << xrootUrl << ", " << xrootdStatus2str(status)); + return false; + } + + // Temporary buffer for messages read from the file. The buffer will be (re-)allocated + // as needed to get the largest message. Note that a size of the messages won't exceed + // a limit set in ProtoHeaderWrap::PROTOBUFFER_HARD_LIMIT. + unique_ptr buf; + size_t bufSize = 0; + + uint64_t offset = 0; // A location of the next byte to be read from the input file. + bool success = true; + try { + while (true) { + // Read the frame header that carries a size of the subsequent message. + uint32_t msgSizeBytes = 0; + uint32_t bytesRead = 0; + status = file.Read(offset, sizeof(uint32_t), reinterpret_cast(&msgSizeBytes), bytesRead); + if (!status.IsOK()) { + throw runtime_error(context + "failed to read next frame header (" + + to_string(sizeof(uint32_t)) + " bytes) at offset " + to_string(offset) + + " from " + xrootUrl + ", " + xrootdStatus2str(status)); + } + offset += bytesRead; + + if (bytesRead == 0) break; + if (bytesRead != sizeof(uint32_t)) { + throw runtime_error(context + "read " + to_string(bytesRead) + " bytes instead of " + + to_string(sizeof(uint32_t)) + + " bytes when reading next frame header at offset " + + to_string(offset - bytesRead) + " from " + xrootUrl + ", " + + xrootdStatus2str(status)); + } + if (msgSizeBytes == 0) break; + if (msgSizeBytes > ProtoHeaderWrap::PROTOBUFFER_HARD_LIMIT) { + throw runtime_error(context + "message size of " + to_string(msgSizeBytes) + + " bytes at the frame header read at offset " + + to_string(offset - bytesRead) + " exceeds the hard limit set to " + + to_string(ProtoHeaderWrap::PROTOBUFFER_HARD_LIMIT) + " bytes, from " + + xrootUrl + ", " + xrootdStatus2str(status)); + } + + // (Re-)allocate the buffer if needed. + if (bufSize < msgSizeBytes) { + bufSize = msgSizeBytes; + buf.reset(new char[bufSize]); + } + + // Read the message. + size_t bytes2read = msgSizeBytes; + while (bytes2read != 0) { + uint32_t bytesRead = 0; + status = file.Read(offset, bytes2read, buf.get(), bytesRead); + if (!status.IsOK()) { + throw runtime_error(context + "failed to read " + to_string(bytes2read) + + " bytes at offset " + to_string(offset) + " from " + xrootUrl + ", " + + xrootdStatus2str(status)); + } + if (bytesRead == 0) { + throw runtime_error(context + "read 0 bytes instead of " + to_string(bytes2read) + + " bytes at offset " + to_string(offset) + " from " + xrootUrl + ", " + + xrootdStatus2str(status)); + } + offset += bytesRead; + bytes2read -= bytesRead; + } + success = messageIsReady(buf.get(), msgSizeBytes); + if (!success) break; + } + } catch (exception const& ex) { + LOGS(_log, LOG_LVL_ERROR, ex.what()); + success = false; + } + status = file.Close(); + if (!status.IsOK()) { + LOGS(_log, LOG_LVL_WARN, + context << "failed to close " << xrootUrl << ", " << xrootdStatus2str(status)); + } + + // Remove the file from the worker if it still exists. Report and ignore errors. + // The files will be garbage-collected by workers. + XrdCl::FileSystem fileSystem(xrootUrl); + status = fileSystem.Rm(xrootUrl2path(xrootUrl)); + if (!status.IsOK()) { + LOGS(_log, LOG_LVL_WARN, + context << "failed to remove " << xrootUrl << ", " << xrootdStatus2str(status)); + } + return success; +} + +bool readHttpFileAndMerge(lsst::qserv::proto::Result const& result, + function const& messageIsReady) { + string const context = "MergingHandler::" + string(__func__) + " "; + + // Extract data from the input result object before modifying the one. + string const httpUrl = result.fileresource_http(); + + // A location of the next byte to be read from the input file. The variable + // is used for error reporting. + uint64_t offset = 0; + + // Temporary buffer for messages read from the file. The buffer gets automatically + // resized to fit the largest message. + unique_ptr msgBuf; + size_t msgBufSize = 0; + size_t msgBufNext = 0; // An index of the next character in the buffer. + + // Fixed-size buffer to store the message size. + string msgSizeBuf(sizeof(uint32_t), '\0'); + size_t msgSizeBufNext = 0; // An index of the next character in the buffer. + + // The size of the next/current message. The variable is set after succesfully parsing + // the message length header and is reset back to 0 after parsing the message body. + // The value is stays 0 while reading the frame header. + uint32_t msgSizeBytes = 0; + bool success = true; + try { + HttpClient reader("GET", httpUrl); + reader.read([&](char const* inBuf, size_t inBufSize) { + char const* next = inBuf; + char const* const end = inBuf + inBufSize; + while (next < end) { + if (msgSizeBytes == 0) { + // Continue or finish reading the frame header. + size_t const bytes2read = + std::min(sizeof(uint32_t) - msgSizeBufNext, (size_t)(end - next)); + std::memcpy(msgSizeBuf.data() + msgSizeBufNext, next, bytes2read); + next += bytes2read; + offset += bytes2read; + msgSizeBufNext += bytes2read; + if (msgSizeBufNext == sizeof(uint32_t)) { + // Done reading the frame header. + msgSizeBufNext = 0; + // Parse and evaluate the message length. + msgSizeBytes = *(reinterpret_cast(msgSizeBuf.data())); + if (msgSizeBytes == 0) { + throw runtime_error(context + "message size is 0 at offset " + + to_string(offset - sizeof(uint32_t)) + ", file: " + httpUrl); + } + if (msgSizeBytes > ProtoHeaderWrap::PROTOBUFFER_HARD_LIMIT) { + throw runtime_error(context + "message size " + to_string(msgSizeBytes) + + " at offset " + to_string(offset - sizeof(uint32_t)) + + " exceeds the hard limit of " + + to_string(ProtoHeaderWrap::PROTOBUFFER_HARD_LIMIT) + + ", file: " + httpUrl); + } + // Extend the message buffer (if needed). Note that buffer never gets + // truncated to avoid excessive memory deallocations/allocations. + if (msgBufSize < msgSizeBytes) { + msgBufSize = msgSizeBytes; + msgBuf.reset(new char[msgBufSize]); + } + } + } else { + // Continue or finish reading the message body. + size_t const bytes2read = + std::min((size_t)msgSizeBytes - msgBufNext, (size_t)(end - next)); + std::memcpy(msgBuf.get() + msgBufNext, next, bytes2read); + next += bytes2read; + offset += bytes2read; + msgBufNext += bytes2read; + if (msgBufNext == msgSizeBytes) { + // Done reading message body. + msgBufNext = 0; + // Parse and evaluate the message. + bool const success = messageIsReady(msgBuf.get(), msgSizeBytes); + if (!success) { + throw runtime_error(context + "message processing failed at offset " + + to_string(offset - msgSizeBytes) + ", file: " + httpUrl); + } + // Reset the variable to prepare for reading the next header & message (if any). + msgSizeBytes = 0; + } + } + } + }); + if (msgSizeBufNext != 0) { + throw runtime_error(context + "short read of the message header at offset " + + to_string(offset - msgSizeBytes) + ", file: " + httpUrl); + } + if (msgBufNext != 0) { + throw runtime_error(context + "short read of the message body at offset " + + to_string(offset - msgSizeBytes) + ", file: " + httpUrl); + } + } catch (exception const& ex) { + LOGS(_log, LOG_LVL_ERROR, ex.what()); + success = false; + } + + // Remove the file from the worker if it still exists. Report and ignore errors. + // The files will be garbage-collected by workers. + try { + HttpClient remover("DELETE", httpUrl); + remover.read([](char const* inBuf, size_t inBufSize) {}); + } catch (exception const& ex) { + LOGS(_log, LOG_LVL_WARN, context << "failed to remove " << httpUrl << ", ex: " << ex.what()); + } + return success; } +} // namespace + namespace lsst::qserv::ccontrol { //////////////////////////////////////////////////////////////////////// @@ -148,7 +400,6 @@ bool MergingHandler::flush(int bLen, BufPtr const& bufPtr, bool& last, int& next LOGS(_log, LOG_LVL_WARN, "setResult failure " << _wName); return false; } - resultRows = _response->result.row_size(); LOGS(_log, LOG_LVL_DEBUG, "From:" << _wName << " _mBuf " << util::prettyCharList(*bufPtr, 5)); _state = MsgState::HEADER_WAIT; @@ -156,8 +407,38 @@ bool MergingHandler::flush(int bLen, BufPtr const& bufPtr, bool& last, int& next _jobIds.insert(jobId); LOGS(_log, LOG_LVL_DEBUG, "Flushed last=" << last << " for tableName=" << _tableName); - auto success = _merge(); - _response.reset(new WorkerResponse()); + // Dispatch result processing to the corresponidng method which depends on + // the result delivery protocol configured at the worker. + auto const mergeCurrentResult = [this, &resultRows]() { + resultRows += _response->result.row_size(); + bool const success = _merge(); + // A fresh instance may be needed to process the next message of the results stream. + // Note that _merge() resets the object. + _response.reset(new WorkerResponse()); + return success; + }; + bool success = false; + if (!_response->result.fileresource_xroot().empty()) { + success = ::readXrootFileResourceAndMerge( + _response->result, [&](char const* buf, uint32_t messageLength) -> bool { + if (_response->result.ParseFromArray(buf, messageLength) && + _response->result.IsInitialized()) { + return mergeCurrentResult(); + } + throw runtime_error("MergingHandler::flush ** message deserialization failed **"); + }); + } else if (!_response->result.fileresource_http().empty()) { + success = ::readHttpFileAndMerge( + _response->result, [&](char const* buf, uint32_t messageLength) -> bool { + if (_response->result.ParseFromArray(buf, messageLength) && + _response->result.IsInitialized()) { + return mergeCurrentResult(); + } + throw runtime_error("MergingHandler::flush ** message deserialization failed **"); + }); + } else { + success = mergeCurrentResult(); + } return success; } case MsgState::RESULT_RECV: From 77ffa3e84512ea74e73739cfc88c22cb8d65681a Mon Sep 17 00:00:00 2001 From: Igor Gaponenko Date: Thu, 22 Jun 2023 19:04:02 -0700 Subject: [PATCH 8/8] Added a monitor for the results filesystem Also, extended the Web Dashboard to display the filesystem status and usage statistics. --- src/wbase/FileChannelShared.cc | 39 ++++ src/wbase/FileChannelShared.h | 4 + src/wpublish/GetStatusCommand.cc | 2 + .../css/QservWorkerResultsFilesystem.css | 24 +++ src/www/qserv/js/QservMonitoringDashboard.js | 3 + .../qserv/js/QservWorkerResultsFilesystem.js | 193 ++++++++++++++++++ 6 files changed, 265 insertions(+) create mode 100644 src/www/qserv/css/QservWorkerResultsFilesystem.css create mode 100644 src/www/qserv/js/QservWorkerResultsFilesystem.js diff --git a/src/wbase/FileChannelShared.cc b/src/wbase/FileChannelShared.cc index c3715362a1..6b3e23ee67 100644 --- a/src/wbase/FileChannelShared.cc +++ b/src/wbase/FileChannelShared.cc @@ -43,6 +43,7 @@ #include "lsst/log/Log.h" using namespace std; +using namespace nlohmann; namespace fs = boost::filesystem; namespace wconfig = lsst::qserv::wconfig; @@ -146,6 +147,44 @@ void FileChannelShared::cleanUpResults(QueryId queryId) { context << "removed " << numFilesRemoved << " result files from " << dirPath << "."); } +json FileChannelShared::statusToJson() { + string const context = "FileChannelShared::" + string(__func__) + " "; + auto const config = wconfig::WorkerConfig::instance(); + string const protocol = wconfig::WorkerConfig::protocol2str(config->resultDeliveryProtocol()); + fs::path const dirPath = config->resultsDirname(); + json result = json::object({{"protocol", protocol}, + {"folder", dirPath.string()}, + {"capacity_bytes", -1}, + {"free_bytes", -1}, + {"available_bytes", -1}, + {"num_result_files", -1}, + {"size_result_files_bytes", -1}}); + lock_guard const lock(_resultsDirCleanupMtx); + try { + auto const space = fs::space(dirPath); + result["capacity_bytes"] = space.capacity; + result["free_bytes"] = space.free; + result["available_bytes"] = space.available; + uintmax_t sizeResultFilesBytes = 0; + uintmax_t numResultFiles = 0; + string const ext = ".proto"; + auto itr = fs::directory_iterator(dirPath); + for (auto&& entry : boost::make_iterator_range(itr, {})) { + auto const filePath = entry.path(); + if (filePath.has_filename() && filePath.has_extension() && (filePath.extension() == ext)) { + numResultFiles++; + sizeResultFilesBytes += fs::file_size(filePath); + } + } + result["num_result_files"] = numResultFiles; + result["size_result_files_bytes"] = sizeResultFilesBytes; + } catch (exception const& ex) { + LOGS(_log, LOG_LVL_WARN, + context << "failed to get folder stats for " << dirPath << ", ex: " << ex.what()); + } + return result; +} + FileChannelShared::Ptr FileChannelShared::create(shared_ptr const& sendChannel, shared_ptr const& transmitMgr, shared_ptr const& taskMsg) { diff --git a/src/wbase/FileChannelShared.h b/src/wbase/FileChannelShared.h index a19acb26e2..8b28950962 100644 --- a/src/wbase/FileChannelShared.h +++ b/src/wbase/FileChannelShared.h @@ -31,6 +31,7 @@ // Third-party headers #include +#include "nlohmann/json.hpp" // Qserv headers #include "global/intTypes.h" @@ -94,6 +95,9 @@ class FileChannelShared : public ChannelShared { */ static void cleanUpResults(QueryId queryId); + /// @return Status and statistics on the results folder (capacity, usage, etc.) + static nlohmann::json statusToJson(); + /// The factory method for the channel class. static Ptr create(std::shared_ptr const& sendChannel, std::shared_ptr const& transmitMgr, diff --git a/src/wpublish/GetStatusCommand.cc b/src/wpublish/GetStatusCommand.cc index 174602b86f..54ccec4c57 100644 --- a/src/wpublish/GetStatusCommand.cc +++ b/src/wpublish/GetStatusCommand.cc @@ -26,6 +26,7 @@ // Qserv headers #include "proto/worker.pb.h" +#include "wbase/FileChannelShared.h" #include "wbase/MsgProcessor.h" #include "wbase/SendChannel.h" #include "wpublish/ResourceMonitor.h" @@ -58,6 +59,7 @@ void GetStatusCommand::run() { nlohmann::json result; result["processor"] = _processor->statusToJson(_taskSelector); result["resources"] = _resourceMonitor->statusToJson(); + result["filesystem"] = wbase::FileChannelShared::statusToJson(); proto::WorkerCommandGetStatusR reply; reply.set_info(result.dump()); diff --git a/src/www/qserv/css/QservWorkerResultsFilesystem.css b/src/www/qserv/css/QservWorkerResultsFilesystem.css new file mode 100644 index 0000000000..8f1a2669b6 --- /dev/null +++ b/src/www/qserv/css/QservWorkerResultsFilesystem.css @@ -0,0 +1,24 @@ +#fwk-qserv-results-filesystem-controls label { + font-weight: bold; +} +table#fwk-qserv-results-filesystem caption { + caption-side: top; + text-align: right; + padding-top: 0; +} +table#fwk-qserv-results-filesystem tbody th, +table#fwk-qserv-results-filesystem tbody td { + vertical-align:middle; +} +table#fwk-qserv-results-filesystem pre { + padding: 0; + margin: 0; +} +table#fwk-qserv-results-filesystem caption.updating { + background-color: #ffeeba; +} +table#fwk-qserv-results-filesystem > thead > tr > th.sticky { + position:sticky; + top:80px; + z-index:2; +} diff --git a/src/www/qserv/js/QservMonitoringDashboard.js b/src/www/qserv/js/QservMonitoringDashboard.js index c37147752e..2facd2a454 100644 --- a/src/www/qserv/js/QservMonitoringDashboard.js +++ b/src/www/qserv/js/QservMonitoringDashboard.js @@ -48,6 +48,7 @@ require([ 'qserv/QservWorkerSchedulerHist', 'qserv/QservWorkerTasks', 'qserv/QservWorkerTaskHist', + 'qserv/QservWorkerResultsFilesystem', 'qserv/ReplicationController', 'qserv/ReplicationTools', 'qserv/ReplicationConfigGeneral', @@ -85,6 +86,7 @@ function(CSSLoader, QservWorkerSchedulerHist, QservWorkerTasks, QservWorkerTaskHist, + QservWorkerResultsFilesystem, ReplicationController, ReplicationTools, ReplicationConfigGeneral, @@ -173,6 +175,7 @@ function(CSSLoader, new QservWorkerSchedulerHist('Scheduler Histograms'), new QservWorkerTasks('Tasks'), new QservWorkerTaskHist('Task Histograms'), + new QservWorkerResultsFilesystem('Results Filesystem'), new QservCss('CSS') ] } diff --git a/src/www/qserv/js/QservWorkerResultsFilesystem.js b/src/www/qserv/js/QservWorkerResultsFilesystem.js new file mode 100644 index 0000000000..9846e20fbe --- /dev/null +++ b/src/www/qserv/js/QservWorkerResultsFilesystem.js @@ -0,0 +1,193 @@ +define([ + 'webfwk/CSSLoader', + 'webfwk/Fwk', + 'webfwk/FwkApplication', + 'qserv/Common', + 'underscore'], + +function(CSSLoader, + Fwk, + FwkApplication, + Common, + _) { + + CSSLoader.load('qserv/css/QservWorkerResultsFilesystem.css'); + + class QservWorkerResultsFilesystem extends FwkApplication { + + constructor(name) { + super(name); + } + fwk_app_on_show() { + console.log('show: ' + this.fwk_app_name); + this.fwk_app_on_update(); + } + fwk_app_on_hide() { + console.log('hide: ' + this.fwk_app_name); + } + fwk_app_on_update() { + if (this.fwk_app_visible) { + this._init(); + if (this._prev_update_sec === undefined) { + this._prev_update_sec = 0; + } + let now_sec = Fwk.now().sec; + if (now_sec - this._prev_update_sec > this._update_interval_sec()) { + this._prev_update_sec = now_sec; + this._init(); + this._load(); + } + } + } + _init() { + if (this._initialized === undefined) this._initialized = false; + if (this._initialized) return; + this._initialized = true; + let html = ` +
+
+
+
+ + +
+
+ + +
+
+
+
+
+
+ + + + + + + + + + + + + + + + +
workerprotocolfoldercapacity [GB]free [GB]available [GB]used [%]#filessize [GB]
Loading...
+
+
`; + let cont = this.fwk_app_container.html(html); + cont.find(".form-control-selector").change(() => { + this._load(); + }); + cont.find("button#reset-controls-form").click(() => { + this._set_update_interval_sec(10); + this._load(); + }); + } + _form_control(elem_type, id) { + if (this._form_control_obj === undefined) this._form_control_obj = {}; + if (!_.has(this._form_control_obj, id)) { + this._form_control_obj[id] = this.fwk_app_container.find(elem_type + '#' + id); + } + return this._form_control_obj[id]; + } + _update_interval_sec() { return this._form_control('select', 'update-interval').val(); } + _set_update_interval_sec(val) { this._form_control('select', 'update-interval').val(val); } + + /** + * Table for displaying info on MySQL connections that are being open at workers. + */ + _table() { + if (this._table_obj === undefined) { + this._table_obj = this.fwk_app_container.find('table#fwk-qserv-results-filesystem'); + } + return this._table_obj; + } + + /** + * Load data from a web service then render it to the application's page. + */ + _load() { + if (this._loading === undefined) this._loading = false; + if (this._loading) return; + this._loading = true; + + this._table().children('caption').addClass('updating'); + + Fwk.web_service_GET( + "/replication/qserv/worker/status", + {timeout_sec: 2, version: Common.RestAPIVersion}, + (data) => { + this._display(data.status); + Fwk.setLastUpdate(this._table().children('caption')); + this._table().children('caption').removeClass('updating'); + this._loading = false; + }, + (msg) => { + console.log('request failed', this.fwk_app_name, msg); + this._table().children('caption').html('No Response'); + this._table().children('caption').removeClass('updating'); + this._loading = false; + } + ); + } + + /** + * Display MySQL connections + */ + _display(data) { + let html = ''; + for (let worker in data) { + if (!data[worker].success) { + html += ` + + ${worker} +   +   +   +   +   +   +   +   +`; + } else { + let filesystem = data[worker].info.filesystem; + const used = (filesystem.capacity_bytes > 0 && filesystem.available_bytes > 0) ? + (100.0 * (filesystem.capacity_bytes - filesystem.available_bytes) / filesystem.capacity_bytes).toFixed(1) : + -1; + html += ` + + ${worker} + ${filesystem.protocol} + ${filesystem.folder} +
${QservWorkerResultsFilesystem._bytes2gb(filesystem.capacity_bytes)}
+
${QservWorkerResultsFilesystem._bytes2gb(filesystem.free_bytes)}
+
${QservWorkerResultsFilesystem._bytes2gb(filesystem.available_bytes)}
+
${used}
+
${filesystem.num_result_files}
+
${QservWorkerResultsFilesystem._bytes2gb(filesystem.size_result_files_bytes)}
+`; + } + } + this._table().children('tbody').html(html); + } + static _GiB = 1024 * 1024 * 1024; + static _bytes2gb(bytes) { + return bytes < 0 ? bytes : (bytes / QservWorkerResultsFilesystem._GiB).toFixed(1); + } + } + return QservWorkerResultsFilesystem; +});