From 386d0b05036f86fbcf09cf57f5898f1bf5871793 Mon Sep 17 00:00:00 2001 From: Igor Gaponenko Date: Thu, 14 Sep 2023 02:20:20 +0000 Subject: [PATCH] Fixed a bug in the result folder cleanup at the worker start up time In the original implementation of the file-based result delivery protocol, workers were attempting to clean up unclaimed files left in workers' result folders regardless of the protocol option. This clearly was a mistake for the SSI protocol option where the folder wasn't required to exist. As a result of this, the application posts confusing warnings in the logging stream. This was fixed. Another problem that was fixed was related to the protocol options HTTP and XROOT where the application wouldn't abort right away if the required folder didn't exist during the folder cleanup attempt. Not having this folder available in this scenario means a configuration error or a problem with the infrastructure. --- src/wbase/FileChannelShared.cc | 12 +++++++++--- src/xrdsvc/SsiService.cc | 4 +++- 2 files changed, 12 insertions(+), 4 deletions(-) diff --git a/src/wbase/FileChannelShared.cc b/src/wbase/FileChannelShared.cc index 2254c705d..df0d61713 100644 --- a/src/wbase/FileChannelShared.cc +++ b/src/wbase/FileChannelShared.cc @@ -54,10 +54,15 @@ LOG_LOGGER _log = LOG_GET("lsst.qserv.wbase.FileChannelShared"); /** * Iterate over the result files at the results folder and remove those * which satisfy the desired criteria. + * @note The folder must exist when this function gets called. Any other + * scenario means a configuration error or a problem with the infrastructure. + * Running into either of these problems should result in the abort of + * the application. * @param context The calling context (used for logging purposes). * @param fileCanBeRemoved The optional validator to be called for each candidate file. * Note that missing validator means "yes" the candidate file can be removed. * @return The total number of removed files. + * @throws std::runtime_error If the results folder doesn't exist. */ size_t cleanUpResultsImpl(string const& context, fs::path const& dirPath, function fileCanBeRemoved = nullptr) { @@ -66,9 +71,10 @@ size_t cleanUpResultsImpl(string const& context, fs::path const& dirPath, boost::system::error_code ec; auto itr = fs::directory_iterator(dirPath, ec); if (ec.value() != 0) { - LOGS(_log, LOG_LVL_WARN, - context << "failed to open the results folder " << dirPath << ", ec: " << ec << "."); - return numFilesRemoved; + string const err = context + "failed to open the results folder '" + dirPath.string() + + "', ec: " + to_string(ec.value()) + "."; + LOGS(_log, LOG_LVL_ERROR, err); + throw runtime_error(err); } for (auto&& entry : boost::make_iterator_range(itr, {})) { auto filePath = entry.path(); diff --git a/src/xrdsvc/SsiService.cc b/src/xrdsvc/SsiService.cc index 2d98a5df9..9debd8a78 100644 --- a/src/xrdsvc/SsiService.cc +++ b/src/xrdsvc/SsiService.cc @@ -188,7 +188,9 @@ SsiService::SsiService(XrdSsiLogger* log) // ATTENTION: this is the blocking operation since it needs to be run before accepting // new queries to ensure that worker had sufficient resources to process those. if (workerConfig->resultsCleanUpOnStart()) { - wbase::FileChannelShared::cleanUpResultsOnWorkerRestart(); + if (workerConfig->resultDeliveryProtocol() != wconfig::WorkerConfig::ResultDeliveryProtocol::SSI) { + wbase::FileChannelShared::cleanUpResultsOnWorkerRestart(); + } } }