diff --git a/src/admin/python/lsst/qserv/admin/itest.py b/src/admin/python/lsst/qserv/admin/itest.py index 9235cfd65f..73794bc253 100644 --- a/src/admin/python/lsst/qserv/admin/itest.py +++ b/src/admin/python/lsst/qserv/admin/itest.py @@ -969,6 +969,7 @@ def compareQueryResults(run_cases: List[str], outputs_dir: str) -> List[ITestCas if not os.path.exists(os.path.join(outputs_dir, case)): _log.warn("There are no query results to compare for %s", case) continue + comparisons = ( (query_mode_mysql, query_mode_qserv_attached), (query_mode_mysql, query_mode_qserv_detached), diff --git a/src/admin/python/lsst/qserv/admin/qservCli/launch.py b/src/admin/python/lsst/qserv/admin/qservCli/launch.py index ebc24cd170..c04babdbdc 100644 --- a/src/admin/python/lsst/qserv/admin/qservCli/launch.py +++ b/src/admin/python/lsst/qserv/admin/qservCli/launch.py @@ -262,7 +262,9 @@ def cmake( build_image, "cmake", "..", + "-DCMAKE_BUILD_TYPE=Debug" ] + # "-DCMAKE_BUILD_TYPE=Debug" if dry: print(" ".join(args)) return diff --git a/src/admin/python/lsst/qserv/admin/replicationInterface.py b/src/admin/python/lsst/qserv/admin/replicationInterface.py index 88f8ebfbc4..322a2a0d84 100644 --- a/src/admin/python/lsst/qserv/admin/replicationInterface.py +++ b/src/admin/python/lsst/qserv/admin/replicationInterface.py @@ -201,6 +201,8 @@ def __init__( self.repl_ctrl = urlparse(repl_ctrl_uri) self.auth_key = auth_key self.admin_auth_key = admin_auth_key + + # Must match MetaModule::version in http/MetaModule.cc self.repl_api_version = 35 _log.debug(f"ReplicationInterface %s", self.repl_ctrl) diff --git a/src/ccontrol/MergingHandler.cc b/src/ccontrol/MergingHandler.cc index 7b78aed245..db79771f44 100644 --- a/src/ccontrol/MergingHandler.cc +++ b/src/ccontrol/MergingHandler.cc @@ -50,6 +50,7 @@ #include "proto/worker.pb.h" #include "qdisp/CzarStats.h" #include "qdisp/JobQuery.h" +#include "qdisp/UberJob.h" #include "rproc/InfileMerger.h" #include "util/Bug.h" #include "util/common.h" @@ -360,7 +361,7 @@ bool readHttpFileAndMerge(string const& httpUrl, to_string(offset - msgSizeBytes) + ", file: " + httpUrl); } } catch (exception const& ex) { - LOGS(_log, LOG_LVL_ERROR, ex.what()); + LOGS(_log, LOG_LVL_ERROR, string(__func__) + " " + ex.what()); success = false; } @@ -375,6 +376,166 @@ bool readHttpFileAndMerge(string const& httpUrl, return success; } +std::tuple readHttpFileAndMergeHttp( + lsst::qserv::qdisp::UberJob::Ptr const& uberJob, string const& httpUrl, + function const& messageIsReady, + shared_ptr const& httpConnPool) { + string const context = "MergingHandler::" + string(__func__) + " " + " qid=" + uberJob->getIdStr() + " "; + + LOGS(_log, LOG_LVL_DEBUG, context << "httpUrl=" << httpUrl); + + // Track the file while the control flow is staying within the function. + ResultFileTracker const resultFileTracker; + + // The data transmit rate tracker is set up before reading each data message. + unique_ptr> transmitRateTracker; + + // A location of the next byte to be read from the input file. The variable + // is used for error reporting. + uint64_t offset = 0; + + // Temporary buffer for messages read from the file. The buffer gets automatically + // resized to fit the largest message. + unique_ptr msgBuf; + size_t msgBufSize = 0; + size_t msgBufNext = 0; // An index of the next character in the buffer. + + // Fixed-size buffer to store the message size. + string msgSizeBuf(sizeof(uint32_t), '\0'); + size_t msgSizeBufNext = 0; // An index of the next character in the buffer. + + // The size of the next/current message. The variable is set after succesfully parsing + // the message length header and is reset back to 0 after parsing the message body. + // The value is stays 0 while reading the frame header. + uint32_t msgSizeBytes = 0; + bool success = true; + bool mergeSuccess = true; + int headerCount = 0; + uint64_t totalBytesRead = 0; + try { + string const noClientData; + vector const noClientHeaders; + http::ClientConfig clientConfig; + clientConfig.httpVersion = CURL_HTTP_VERSION_1_1; // same as in qhttp + clientConfig.bufferSize = CURL_MAX_READ_SIZE; // 10 MB in the current version of libcurl + clientConfig.tcpKeepAlive = true; + clientConfig.tcpKeepIdle = 5; // the default is 60 sec + clientConfig.tcpKeepIntvl = 5; // the default is 60 sec + http::Client reader(http::Method::GET, httpUrl, noClientData, noClientHeaders, clientConfig, + httpConnPool); + reader.read([&](char const* inBuf, size_t inBufSize) { + // A value of the flag is set by the message processor when it's time to finish + // or abort reading the file. + bool last = false; + char const* next = inBuf; + char const* const end = inBuf + inBufSize; + while ((next < end) && !last) { + LOGS(_log, LOG_LVL_WARN, + context << "TODO:UJ next=" << (uint64_t)next << " end=" << (uint64_t)end + << " last=" << last); + if (msgSizeBytes == 0) { + // Continue or finish reading the frame header. + size_t const bytes2read = + std::min(sizeof(uint32_t) - msgSizeBufNext, (size_t)(end - next)); + std::memcpy(msgSizeBuf.data() + msgSizeBufNext, next, bytes2read); + next += bytes2read; + offset += bytes2read; + msgSizeBufNext += bytes2read; + if (msgSizeBufNext == sizeof(uint32_t)) { + ++headerCount; + // Done reading the frame header. + msgSizeBufNext = 0; + // Parse and evaluate the message length. + msgSizeBytes = *(reinterpret_cast(msgSizeBuf.data())); + if (msgSizeBytes == 0) { + throw runtime_error("message size is 0 at offset " + + to_string(offset - sizeof(uint32_t)) + ", file: " + httpUrl); + } + if (msgSizeBytes > ProtoHeaderWrap::PROTOBUFFER_HARD_LIMIT) { + throw runtime_error("message size " + to_string(msgSizeBytes) + " at offset " + + to_string(offset - sizeof(uint32_t)) + + " exceeds the hard limit of " + + to_string(ProtoHeaderWrap::PROTOBUFFER_HARD_LIMIT) + + ", file: " + httpUrl); + } + // Extend the message buffer (if needed). Note that buffer never gets + // truncated to avoid excessive memory deallocations/allocations. + if (msgBufSize < msgSizeBytes) { + msgBufSize = msgSizeBytes; + msgBuf.reset(new char[msgBufSize]); + } + // Starts the tracker to measure the performance of the network I/O. + transmitRateTracker = + make_unique>(reportFileRecvRate); + } + } else { + // Continue or finish reading the message body. + size_t const bytes2read = + std::min((size_t)msgSizeBytes - msgBufNext, (size_t)(end - next)); + std::memcpy(msgBuf.get() + msgBufNext, next, bytes2read); + next += bytes2read; + offset += bytes2read; + msgBufNext += bytes2read; + if (msgBufNext == msgSizeBytes) { + // Done reading message body. + msgBufNext = 0; + + // Destroying the tracker will result in stopping the tracker's timer and + // reporting the file read rate before proceeding to the merge. + if (transmitRateTracker != nullptr) { + transmitRateTracker->addToValue(msgSizeBytes); + transmitRateTracker->setSuccess(); + transmitRateTracker.reset(); + } + + // Parse and evaluate the message. + mergeSuccess = messageIsReady(msgBuf.get(), msgSizeBytes, last); + totalBytesRead += msgSizeBytes; + if (!mergeSuccess) { + success = false; + throw runtime_error("message processing failed at offset " + + to_string(offset - msgSizeBytes) + ", file: " + httpUrl); + } + // Reset the variable to prepare for reading the next header & message (if any). + msgSizeBytes = 0; + } else { + LOGS(_log, LOG_LVL_WARN, + context << " headerCount=" << headerCount + << " incomplete read diff=" << (msgSizeBytes - msgBufNext)); + } + } + } + }); + LOGS(_log, LOG_LVL_DEBUG, + context << " headerCount=" << headerCount << " msgSizeBytes=" << msgSizeBytes + << " totalBytesRead=" << totalBytesRead); + if (msgSizeBufNext != 0) { + throw runtime_error("short read of the message header at offset " + + to_string(offset - msgSizeBytes) + ", file: " + httpUrl); + } + if (msgBufNext != 0) { + throw runtime_error("short read of the message body at offset " + + to_string(offset - msgSizeBytes) + ", file: " + httpUrl); + } + } catch (exception const& ex) { + LOGS(_log, LOG_LVL_ERROR, context + " " + ex.what()); + success = false; + } + + // Remove the file from the worker if it still exists. Report and ignore errors. + // The files will be garbage-collected by workers. + try { + http::Client remover(http::Method::DELETE, httpUrl); + remover.read([](char const* inBuf, size_t inBufSize) {}); + } catch (exception const& ex) { + LOGS(_log, LOG_LVL_WARN, context << "failed to remove " << httpUrl << ", ex: " << ex.what()); + } + // If the merge failed, that indicates something went wrong in the local database table, + // is likely this user query is doomed and should be cancelled. + LOGS(_log, LOG_LVL_DEBUG, context << " end succes=" << success << " mergeSuccess=" << mergeSuccess); + return {success, mergeSuccess}; +} + } // namespace namespace lsst::qserv::ccontrol { @@ -396,18 +557,20 @@ MergingHandler::MergingHandler(std::shared_ptr merger, std: _initState(); } -MergingHandler::~MergingHandler() { LOGS(_log, LOG_LVL_DEBUG, __func__); } +MergingHandler::~MergingHandler() { LOGS(_log, LOG_LVL_DEBUG, __func__ << " " << _tableName); } bool MergingHandler::flush(proto::ResponseSummary const& responseSummary, uint32_t& resultRows) { _wName = responseSummary.wname(); // This is needed to ensure the job query would be staying alive for the duration // of the operation to prevent inconsistency witin the application. - auto const jobQuery = getJobQuery().lock(); - if (jobQuery == nullptr) { - LOGS(_log, LOG_LVL_ERROR, __func__ << " failed, jobQuery was NULL"); + auto const jobBase = getJobBase().lock(); + if (jobBase == nullptr) { + LOGS(_log, LOG_LVL_ERROR, __func__ << " failed, jobBase was NULL"); return false; } + auto const jobQuery = std::dynamic_pointer_cast(jobBase); + LOGS(_log, LOG_LVL_TRACE, "MergingHandler::" << __func__ << " jobid=" << responseSummary.jobid() << " transmitsize=" << responseSummary.transmitsize() @@ -508,10 +671,79 @@ bool MergingHandler::_merge(proto::ResponseSummary const& responseSummary, return success; } +bool MergingHandler::_mergeHttp(shared_ptr const& uberJob, + proto::ResponseData const& responseData) { + if (_flushed) { + throw util::Bug(ERR_LOC, "already flushed"); + } + bool const success = _infileMerger->mergeHttp(uberJob, responseData); + if (!success) { + LOGS(_log, LOG_LVL_WARN, __func__ << " failed"); + util::Error const& err = _infileMerger->getError(); + _setError(ccontrol::MSG_RESULT_ERROR, err.getMsg()); + } + return success; +} + void MergingHandler::_setError(int code, std::string const& msg) { LOGS(_log, LOG_LVL_DEBUG, "_setErr: code: " << code << ", message: " << msg); std::lock_guard lock(_errorMutex); _error = Error(code, msg); } +tuple MergingHandler::flushHttp(string const& fileUrl, uint64_t expectedRows, + uint64_t& resultRows) { + bool success = false; + bool shouldCancel = false; + + // This is needed to ensure the job query would be staying alive for the duration + // of the operation to prevent inconsistency within the application. + auto const jobBase = getJobBase().lock(); + if (jobBase == nullptr) { + LOGS(_log, LOG_LVL_ERROR, __func__ << " failed, jobBase was NULL"); + return {success, shouldCancel}; // both should still be false + } + auto const uberJob = std::dynamic_pointer_cast(jobBase); + + LOGS(_log, LOG_LVL_TRACE, + "MergingHandler::" << __func__ << " uberJob=" << uberJob->getIdStr() << " fileUrl=" << fileUrl); + + // Dispatch result processing to the corresponidng method which depends on + // the result delivery protocol configured at the worker. + // Notify the file reader when all rows have been read by setting 'last = true'. + auto const dataMergerHttp = [&](char const* buf, uint32_t bufSize, bool& last) { + LOGS(_log, LOG_LVL_TRACE, "dataMergerHttp"); + last = true; + proto::ResponseData responseData; + if (responseData.ParseFromArray(buf, bufSize) && responseData.IsInitialized()) { + bool const mergeSuccess = _mergeHttp(uberJob, responseData); + if (mergeSuccess) { + resultRows += responseData.row_size(); + last = resultRows >= expectedRows; + } + return mergeSuccess; + } + throw runtime_error("MergingHandler::flush ** message deserialization failed **"); + }; + + tie(success, shouldCancel) = + ::readHttpFileAndMergeHttp(uberJob, fileUrl, dataMergerHttp, MergingHandler::_getHttpConnPool()); + + if (!success || shouldCancel) { + LOGS(_log, LOG_LVL_WARN, __func__ << " success=" << success << " shouldCancel=" << shouldCancel); + } + + if (success) { + _infileMerger->mergeCompleteFor(uberJob->getJobId()); + } + return {success, shouldCancel}; +} + +void MergingHandler::flushHttpError(int errorCode, std::string const& errorMsg, int status) { + if (!_errorSet.exchange(true)) { + _error = util::Error(errorCode, errorMsg, util::ErrorCode::MYSQLEXEC); + _setError(ccontrol::MSG_RESULT_ERROR, _error.getMsg()); + } +} + } // namespace lsst::qserv::ccontrol diff --git a/src/ccontrol/MergingHandler.h b/src/ccontrol/MergingHandler.h index 97cd564dd6..1152dc9324 100644 --- a/src/ccontrol/MergingHandler.h +++ b/src/ccontrol/MergingHandler.h @@ -44,6 +44,7 @@ class ResponseSummary; namespace lsst::qserv::qdisp { class JobQuery; +class UberJob; } // namespace lsst::qserv::qdisp namespace lsst::qserv::rproc { @@ -74,6 +75,14 @@ class MergingHandler : public qdisp::ResponseHandler { /// @return true if successful (no error) bool flush(proto::ResponseSummary const& responseSummary, uint32_t& resultRows) override; + /// @see ResponseHandler::flushHttp + /// @see MerginHandler::_mergeHttp + std::tuple flushHttp(std::string const& fileUrl, uint64_t expectedRows, + uint64_t& resultRows) override; + + /// @see ResponseHandler::flushHttpError + void flushHttpError(int errorCode, std::string const& errorMsg, int status) override; + /// Signal an unrecoverable error condition. No further calls are expected. void errorFlush(std::string const& msg, int code) override; @@ -101,6 +110,9 @@ class MergingHandler : public qdisp::ResponseHandler { bool _merge(proto::ResponseSummary const& responseSummary, proto::ResponseData const& responseData, std::shared_ptr const& jobQuery); + /// Call InfileMerger to do the work of merging this data to the result. + bool _mergeHttp(std::shared_ptr const& uberJob, proto::ResponseData const& responseData); + /// Set error code and string. void _setError(int code, std::string const& msg); @@ -115,6 +127,7 @@ class MergingHandler : public qdisp::ResponseHandler { std::shared_ptr _infileMerger; ///< Merging delegate std::string _tableName; ///< Target table name Error _error; ///< Error description + std::atomic _errorSet{false}; ///< Set to true when an error is set. mutable std::mutex _errorMutex; ///< Protect readers from partial updates bool _flushed{false}; ///< flushed to InfileMerger? std::string _wName{"~"}; ///< worker name diff --git a/src/ccontrol/UserQuery.h b/src/ccontrol/UserQuery.h index 1989916884..a63efa2bd8 100644 --- a/src/ccontrol/UserQuery.h +++ b/src/ccontrol/UserQuery.h @@ -42,9 +42,9 @@ #include "qmeta/types.h" // Forward decl -namespace lsst::qserv::qdisp { +namespace lsst::qserv::qmeta { class MessageStore; -} // namespace lsst::qserv::qdisp +} // namespace lsst::qserv::qmeta namespace lsst::qserv::ccontrol { @@ -74,7 +74,7 @@ class UserQuery { virtual void discard() = 0; // Delegate objects - virtual std::shared_ptr getMessageStore() = 0; + virtual std::shared_ptr getMessageStore() = 0; /// This method should disappear when we start supporting results /// in locations other than MySQL tables. We'll switch to getResultLocation() diff --git a/src/ccontrol/UserQueryAsyncResult.cc b/src/ccontrol/UserQueryAsyncResult.cc index 9f11c46e50..a3edbbcc2c 100644 --- a/src/ccontrol/UserQueryAsyncResult.cc +++ b/src/ccontrol/UserQueryAsyncResult.cc @@ -32,9 +32,9 @@ // Qserv headers #include "qmeta/Exceptions.h" +#include "qmeta/JobStatus.h" #include "qmeta/QMeta.h" -#include "qdisp/JobStatus.h" -#include "qdisp/MessageStore.h" +#include "qmeta/MessageStore.h" #include "sql/SqlConnection.h" #include "sql/SqlResults.h" @@ -53,7 +53,7 @@ UserQueryAsyncResult::UserQueryAsyncResult(QueryId queryId, qmeta::CzarId qMetaC _qMetaCzarId(qMetaCzarId), _qMeta(qMeta), _resultDbConn(resultDbConn), - _messageStore(std::make_shared()) { + _messageStore(std::make_shared()) { LOGS(_log, LOG_LVL_DEBUG, "UserQueryAsyncResult: QID=" << queryId); // get query info from QMeta @@ -149,8 +149,8 @@ void UserQueryAsyncResult::submit() { std::string sevStr = row[3].first; int64_t timestampMilli = boost::lexical_cast(row[4].first); MessageSeverity sev = sevStr == "INFO" ? MSG_INFO : MSG_ERROR; - qdisp::JobStatus::Clock::duration duration = std::chrono::milliseconds(timestampMilli); - qdisp::JobStatus::TimeType timestamp(duration); + qmeta::JobStatus::Clock::duration duration = std::chrono::milliseconds(timestampMilli); + qmeta::JobStatus::TimeType timestamp(duration); _messageStore->addMessage(chunkId, "DUPLICATE", code, message, sev, timestamp); } catch (std::exception const& exc) { LOGS(_log, LOG_LVL_ERROR, "Error reading message table data: " << exc.what()); @@ -184,7 +184,7 @@ void UserQueryAsyncResult::kill() {} void UserQueryAsyncResult::discard() {} -std::shared_ptr UserQueryAsyncResult::getMessageStore() { return _messageStore; } +std::shared_ptr UserQueryAsyncResult::getMessageStore() { return _messageStore; } std::string UserQueryAsyncResult::getResultTableName() const { if (_qInfo.resultLocation().compare(0, 6, "table:") == 0) { diff --git a/src/ccontrol/UserQueryAsyncResult.h b/src/ccontrol/UserQueryAsyncResult.h index 9b2340211d..d65e8a76da 100644 --- a/src/ccontrol/UserQueryAsyncResult.h +++ b/src/ccontrol/UserQueryAsyncResult.h @@ -31,7 +31,7 @@ #include "qmeta/QInfo.h" #include "qmeta/types.h" -namespace lsst::qserv::qdisp { +namespace lsst::qserv::qmeta { class MessageStore; } @@ -90,7 +90,7 @@ class UserQueryAsyncResult : public UserQuery { void discard() override; // Delegate objects - std::shared_ptr getMessageStore() override; + std::shared_ptr getMessageStore() override; /// This method should disappear when we start supporting results /// in locations other than MySQL tables. We'll switch to getResultLocation() @@ -113,7 +113,7 @@ class UserQueryAsyncResult : public UserQuery { std::shared_ptr _qMeta; sql::SqlConnection* _resultDbConn; qmeta::QInfo _qInfo; - std::shared_ptr _messageStore; + std::shared_ptr _messageStore; QueryState _qState = UNKNOWN; }; diff --git a/src/ccontrol/UserQueryDrop.cc b/src/ccontrol/UserQueryDrop.cc index 5e291800ff..842f68c250 100644 --- a/src/ccontrol/UserQueryDrop.cc +++ b/src/ccontrol/UserQueryDrop.cc @@ -33,8 +33,8 @@ // Qserv headers #include "css/CssAccess.h" #include "css/CssError.h" -#include "qdisp/MessageStore.h" #include "qmeta/Exceptions.h" +#include "qmeta/MessageStore.h" #include "qmeta/QMeta.h" #include "sql/SqlConnection.h" #include "sql/SqlErrorObject.h" @@ -57,7 +57,7 @@ UserQueryDrop::UserQueryDrop(std::shared_ptr const& css, std::st _queryMetadata(queryMetadata), _qMetaCzarId(qMetaCzarId), _qState(UNKNOWN), - _messageStore(std::make_shared()), + _messageStore(std::make_shared()), _sessionId(0) {} std::string UserQueryDrop::getError() const { return std::string(); } diff --git a/src/ccontrol/UserQueryDrop.h b/src/ccontrol/UserQueryDrop.h index 73cc6b6b61..450f06fab8 100644 --- a/src/ccontrol/UserQueryDrop.h +++ b/src/ccontrol/UserQueryDrop.h @@ -88,7 +88,7 @@ class UserQueryDrop : public UserQuery { virtual void discard() override; // Delegate objects - virtual std::shared_ptr getMessageStore() override { return _messageStore; } + virtual std::shared_ptr getMessageStore() override { return _messageStore; } private: /// Check the status of item to be dropped @@ -101,7 +101,7 @@ class UserQueryDrop : public UserQuery { std::shared_ptr _queryMetadata; qmeta::CzarId const _qMetaCzarId; ///< Czar ID in QMeta database QueryState _qState; - std::shared_ptr _messageStore; + std::shared_ptr _messageStore; int _sessionId; ///< External reference number }; diff --git a/src/ccontrol/UserQueryFactory.cc b/src/ccontrol/UserQueryFactory.cc index 44aeda0c1f..85ba8a7dcd 100644 --- a/src/ccontrol/UserQueryFactory.cc +++ b/src/ccontrol/UserQueryFactory.cc @@ -56,7 +56,7 @@ #include "mysql/MySqlConfig.h" #include "parser/ParseException.h" #include "qdisp/Executive.h" -#include "qdisp/MessageStore.h" +#include "qmeta/MessageStore.h" #include "qmeta/QMetaMysql.h" #include "qmeta/QMetaSelect.h" #include "qmeta/QStatusMysql.h" @@ -225,8 +225,6 @@ UserQuery::Ptr UserQueryFactory::newUserQuery(std::string const& aQuery, std::st // First check for SUBMIT and strip it std::string query = aQuery; - // TODO: DM-43386 need to have WorkerChunkMap info at this point - std::string stripped; bool async = false; if (UserQueryType::isSubmit(query, stripped)) { @@ -305,7 +303,7 @@ UserQuery::Ptr UserQueryFactory::newUserQuery(std::string const& aQuery, std::st sessionValid = false; } - auto messageStore = std::make_shared(); + auto messageStore = std::make_shared(); std::shared_ptr executive; std::shared_ptr infileMergerConfig; if (sessionValid) { @@ -328,6 +326,7 @@ UserQuery::Ptr UserQueryFactory::newUserQuery(std::string const& aQuery, std::st uq->qMetaRegister(resultLocation, msgTableName); uq->setupMerger(); uq->saveResultQuery(); + executive->setUserQuerySelect(uq); } return uq; } else if (UserQueryType::isSelectResult(query, userJobId)) { diff --git a/src/ccontrol/UserQueryFlushChunksCache.cc b/src/ccontrol/UserQueryFlushChunksCache.cc index 2c808c139f..47037d127a 100644 --- a/src/ccontrol/UserQueryFlushChunksCache.cc +++ b/src/ccontrol/UserQueryFlushChunksCache.cc @@ -31,7 +31,7 @@ // Qserv headers #include "css/CssAccess.h" #include "css/EmptyChunks.h" -#include "qdisp/MessageStore.h" +#include "qmeta/MessageStore.h" #include "sql/SqlConnection.h" #include "sql/SqlErrorObject.h" @@ -49,7 +49,7 @@ UserQueryFlushChunksCache::UserQueryFlushChunksCache(std::shared_ptr()) {} + _messageStore(std::make_shared()) {} std::string UserQueryFlushChunksCache::getError() const { return std::string(); } diff --git a/src/ccontrol/UserQueryFlushChunksCache.h b/src/ccontrol/UserQueryFlushChunksCache.h index 74054aaaa3..fe4e913c0b 100644 --- a/src/ccontrol/UserQueryFlushChunksCache.h +++ b/src/ccontrol/UserQueryFlushChunksCache.h @@ -82,7 +82,7 @@ class UserQueryFlushChunksCache : public UserQuery { virtual void discard() override; // Delegate objects - virtual std::shared_ptr getMessageStore() override { return _messageStore; } + virtual std::shared_ptr getMessageStore() override { return _messageStore; } protected: private: @@ -90,7 +90,7 @@ class UserQueryFlushChunksCache : public UserQuery { std::string const _dbName; sql::SqlConnection* _resultDbConn; QueryState _qState; - std::shared_ptr _messageStore; + std::shared_ptr _messageStore; }; } // namespace lsst::qserv::ccontrol diff --git a/src/ccontrol/UserQueryInvalid.h b/src/ccontrol/UserQueryInvalid.h index 3296bf47f7..1ab69bb943 100644 --- a/src/ccontrol/UserQueryInvalid.h +++ b/src/ccontrol/UserQueryInvalid.h @@ -32,7 +32,7 @@ // Qserv headers #include "ccontrol/UserQuery.h" -#include "qdisp/MessageStore.h" +#include "qmeta/MessageStore.h" #include "qmeta/types.h" // Forward decl @@ -44,7 +44,7 @@ namespace lsst::qserv::ccontrol { class UserQueryInvalid : public UserQuery { public: UserQueryInvalid(std::string const& message) - : _message(message), _messageStore(std::make_shared()) {} + : _message(message), _messageStore(std::make_shared()) {} UserQueryInvalid(UserQueryInvalid const&) = delete; UserQueryInvalid& operator=(UserQueryInvalid const&) = delete; @@ -69,11 +69,11 @@ class UserQueryInvalid : public UserQuery { virtual void discard() override {} // Delegate objects - virtual std::shared_ptr getMessageStore() override { return _messageStore; } + virtual std::shared_ptr getMessageStore() override { return _messageStore; } private: std::string const _message; - std::shared_ptr _messageStore; + std::shared_ptr _messageStore; }; } // namespace lsst::qserv::ccontrol diff --git a/src/ccontrol/UserQueryProcessList.cc b/src/ccontrol/UserQueryProcessList.cc index 85a626ecaf..686c8d0bd4 100644 --- a/src/ccontrol/UserQueryProcessList.cc +++ b/src/ccontrol/UserQueryProcessList.cc @@ -35,7 +35,7 @@ // Qserv headers #include "css/CssAccess.h" #include "css/CssError.h" -#include "qdisp/MessageStore.h" +#include "qmeta/MessageStore.h" #include "qmeta/Exceptions.h" #include "qmeta/QMetaSelect.h" #include "query/FromList.h" @@ -68,7 +68,7 @@ UserQueryProcessList::UserQueryProcessList(std::shared_ptr co : _resultDbConn(resultDbConn), _qMetaSelect(qMetaSelect), _qMetaCzarId(qMetaCzarId), - _messageStore(std::make_shared()), + _messageStore(std::make_shared()), _resultTableName(::g_nextResultTableId(userQueryId)), _resultDb(resultDb) { // The SQL statement should be mostly OK alredy but we need to change @@ -104,7 +104,7 @@ UserQueryProcessList::UserQueryProcessList(bool full, sql::SqlConnection* result : _resultDbConn(resultDbConn), _qMetaSelect(qMetaSelect), _qMetaCzarId(qMetaCzarId), - _messageStore(std::make_shared()), + _messageStore(std::make_shared()), _resultTableName(::g_nextResultTableId(userQueryId)), _resultDb(resultDb) { // use ShowProcessList view with completion statistics. diff --git a/src/ccontrol/UserQueryProcessList.h b/src/ccontrol/UserQueryProcessList.h index 260d3a0adc..8dd421d401 100644 --- a/src/ccontrol/UserQueryProcessList.h +++ b/src/ccontrol/UserQueryProcessList.h @@ -103,7 +103,7 @@ class UserQueryProcessList : public UserQuery { void discard() override; // Delegate objects - std::shared_ptr getMessageStore() override { return _messageStore; } + std::shared_ptr getMessageStore() override { return _messageStore; } /// @return Name of the result table for this query, can be empty std::string getResultTableName() const override { return _resultTableName; } @@ -122,7 +122,7 @@ class UserQueryProcessList : public UserQuery { std::shared_ptr _qMetaSelect; qmeta::CzarId const _qMetaCzarId; ///< Czar ID in QMeta database QueryState _qState = UNKNOWN; - std::shared_ptr _messageStore; + std::shared_ptr _messageStore; std::string _resultTableName; std::string _query; ///< query to execute on QMeta database std::string _orderBy; diff --git a/src/ccontrol/UserQueryQservManager.cc b/src/ccontrol/UserQueryQservManager.cc index 7d8065d63b..5f132c5802 100644 --- a/src/ccontrol/UserQueryQservManager.cc +++ b/src/ccontrol/UserQueryQservManager.cc @@ -36,7 +36,7 @@ // Qserv headers #include "cconfig/CzarConfig.h" -#include "qdisp/MessageStore.h" +#include "qmeta/MessageStore.h" #include "sql/SqlBulkInsert.h" #include "sql/SqlConnection.h" #include "sql/SqlConnectionFactory.h" @@ -56,7 +56,7 @@ UserQueryQservManager::UserQueryQservManager(shared_ptr cons string const& value) : _value(value), _resultTableName("qserv_manager_" + queryResources->userQueryId), - _messageStore(make_shared()), + _messageStore(make_shared()), _resultDb(queryResources->resultDb) {} void UserQueryQservManager::submit() { diff --git a/src/ccontrol/UserQueryQservManager.h b/src/ccontrol/UserQueryQservManager.h index fee3e56248..0c73a9f659 100644 --- a/src/ccontrol/UserQueryQservManager.h +++ b/src/ccontrol/UserQueryQservManager.h @@ -38,9 +38,9 @@ #include "ccontrol/QueryState.h" #include "global/intTypes.h" -namespace lsst::qserv::qdisp { +namespace lsst::qserv::qmeta { class MessageStore; -} // namespace lsst::qserv::qdisp +} // namespace lsst::qserv::qmeta namespace lsst::qserv::ccontrol { @@ -74,7 +74,7 @@ class UserQueryQservManager : public UserQuery { void discard() override {} // Delegate objects - std::shared_ptr getMessageStore() override { return _messageStore; } + std::shared_ptr getMessageStore() override { return _messageStore; } std::string getResultLocation() const override { return "table:" + _resultTableName; } @@ -84,7 +84,7 @@ class UserQueryQservManager : public UserQuery { private: std::string const _value; std::string _resultTableName; - std::shared_ptr _messageStore; + std::shared_ptr _messageStore; QueryState _qState{UNKNOWN}; std::string _resultDb; }; diff --git a/src/ccontrol/UserQuerySelect.cc b/src/ccontrol/UserQuerySelect.cc index 91a7b21ae9..f96a293cc0 100644 --- a/src/ccontrol/UserQuerySelect.cc +++ b/src/ccontrol/UserQuerySelect.cc @@ -80,12 +80,16 @@ #include "ccontrol/MergingHandler.h" #include "ccontrol/TmpTableName.h" #include "ccontrol/UserQueryError.h" +#include "czar/Czar.h" +#include "czar/CzarChunkMap.h" +#include "czar/CzarRegistry.h" #include "global/constants.h" #include "global/LogContext.h" #include "proto/worker.pb.h" #include "proto/ProtoImporter.h" #include "qdisp/Executive.h" -#include "qdisp/MessageStore.h" +#include "qdisp/JobQuery.h" +#include "qmeta/MessageStore.h" #include "qmeta/QMeta.h" #include "qmeta/Exceptions.h" #include "qproc/geomAdapter.h" @@ -102,14 +106,18 @@ #include "query/ValueFactor.h" #include "rproc/InfileMerger.h" #include "sql/Schema.h" +#include "util/Bug.h" #include "util/IterableFormatter.h" #include "util/ThreadPriority.h" #include "xrdreq/QueryManagementAction.h" +#include "qdisp/UberJob.h" namespace { LOG_LOGGER _log = LOG_GET("lsst.qserv.ccontrol.UserQuerySelect"); } // namespace +using namespace std; + namespace lsst::qserv { /// A class that can be used to parameterize a ProtoImporter for @@ -127,7 +135,7 @@ namespace ccontrol { /// Constructor UserQuerySelect::UserQuerySelect(std::shared_ptr const& qs, - std::shared_ptr const& messageStore, + std::shared_ptr const& messageStore, std::shared_ptr const& executive, std::shared_ptr const& dbModels, std::shared_ptr const& infileMergerConfig, @@ -242,10 +250,10 @@ void UserQuerySelect::submit() { assert(_infileMerger); auto taskMsgFactory = std::make_shared(); - TmpTableName ttn(_qMetaQueryId, _qSession->getOriginal()); + _ttn = std::make_shared(_qMetaQueryId, _qSession->getOriginal()); std::vector chunks; std::mutex chunksMtx; - int sequence = 0; + JobId sequence = 0; auto queryTemplates = _qSession->makeQueryTemplates(); @@ -254,14 +262,6 @@ void UserQuerySelect::submit() { : "none produced.")); // Writing query for each chunk, stop if query is cancelled. - // attempt to change priority, requires root - bool increaseThreadPriority = false; // TODO: add to configuration - util::ThreadPriority threadPriority(pthread_self()); - if (increaseThreadPriority) { - threadPriority.storeOriginalValues(); - threadPriority.setPriorityPolicy(10); - } - // Add QStatsTmp table entry try { _queryStatsData->queryStatsTmpRegister(_qMetaQueryId, _qSession->getChunksSize()); @@ -271,44 +271,72 @@ void UserQuerySelect::submit() { _executive->setScanInteractive(_qSession->getScanInteractive()); + string dbName(""); + bool dbNameSet = false; + for (auto i = _qSession->cQueryBegin(), e = _qSession->cQueryEnd(); i != e && !_executive->getCancelled(); ++i) { auto& chunkSpec = *i; - std::function funcBuildJob = [this, sequence, // sequence must be a copy - &chunkSpec, &queryTemplates, &chunks, &chunksMtx, - &ttn, &taskMsgFactory](util::CmdData*) { - QSERV_LOGCONTEXT_QUERY(_qMetaQueryId); - - qproc::ChunkQuerySpec::Ptr cs; - { - std::lock_guard lock(chunksMtx); - bool const fillInChunkIdTag = false; - cs = _qSession->buildChunkQuerySpec(queryTemplates, chunkSpec, fillInChunkIdTag); - chunks.push_back(cs->chunkId); + // Make the JobQuery now + QSERV_LOGCONTEXT_QUERY(_qMetaQueryId); + + qproc::ChunkQuerySpec::Ptr cs; + { + std::lock_guard lock(chunksMtx); + cs = _qSession->buildChunkQuerySpec(queryTemplates, chunkSpec); + chunks.push_back(cs->chunkId); + } + std::string chunkResultName = _ttn->make(cs->chunkId); + + // This should only need to be set once as all jobs should have the same database name. + if (cs->db != dbName) { + if (dbNameSet) { + LOGS(_log, LOG_LVL_ERROR, "dbName change from " << dbName << " to " << cs->db); + return; } - std::string chunkResultName = ttn.make(cs->chunkId); - - ResourceUnit ru; - ru.setAsDbChunk(cs->db, cs->chunkId); - qdisp::JobDescription::Ptr jobDesc = qdisp::JobDescription::create( - _qMetaCzarId, _executive->getId(), sequence, ru, - std::make_shared(_infileMerger, chunkResultName), taskMsgFactory, cs, - chunkResultName); - _executive->add(jobDesc); - }; - - auto cmd = std::make_shared(funcBuildJob); - _executive->queueJobStart(cmd); + dbName = cs->db; + dbNameSet = true; + } + + ResourceUnit ru; + ru.setAsDbChunk(cs->db, cs->chunkId); + qdisp::JobDescription::Ptr jobDesc = qdisp::JobDescription::create( + _qMetaCzarId, _executive->getId(), sequence, ru, + std::make_shared(_infileMerger, chunkResultName), taskMsgFactory, cs, + chunkResultName); + auto job = _executive->add(jobDesc); + + if (!uberJobsEnabled) { + // references in captures cause races + auto funcBuildJob = [this, job{move(job)}](util::CmdData*) { + QSERV_LOGCONTEXT_QUERY(_qMetaQueryId); + _executive->runJobQuery(job); + }; + auto cmd = std::make_shared(funcBuildJob); + _executive->queueJobStart(cmd); + } ++sequence; } - // attempt to restore original thread priority, requires root - if (increaseThreadPriority) { - threadPriority.restoreOriginalValues(); + if (dbNameSet) { + _queryDbName = dbName; + } + + /// At this point the executive has a map of all jobs with the chunkIds as the key. + if (uberJobsEnabled) { + // TODO:UJ _maxCHunksPerUberJob maybe put in config??? or set on command line?? + // Different queries may benefit from different values + // Such as LIMIT=1 may work best with this at 1, where + // 100 would be better for others. + _maxChunksPerUberJob = 2; + // This is needed to prevent Czar::_monitor from starting things before they are ready. + _executive->setReadyToExecute(); + buildAndSendUberJobs(); } LOGS(_log, LOG_LVL_DEBUG, "total jobs in query=" << sequence); + // TODO:UJ Waiting for all jobs to start may not be needed anymore? _executive->waitForAllJobsToStart(); // we only care about per-chunk info for ASYNC queries @@ -318,6 +346,155 @@ void UserQuerySelect::submit() { } } +void UserQuerySelect::buildAndSendUberJobs() { + string const funcN("UserQuerySelect::" + string(__func__) + " QID=" + to_string(_qMetaQueryId)); + LOGS(_log, LOG_LVL_DEBUG, funcN << " start"); + + // Ensure `_monitor()` doesn't do anything until everything is ready. + if (!_executive->isReadyToExecute()) { + LOGS(_log, LOG_LVL_INFO, funcN << " executive isn't ready to generate UberJobs."); + return; + } + + // Only one thread should be generating UberJobs for this user query at any given time. + lock_guard fcLock(_buildUberJobMtx); + bool const clearFlag = false; + _executive->setFlagFailedUberJob(clearFlag); + LOGS(_log, LOG_LVL_DEBUG, "UserQuerySelect::" << __func__ << " totalJobs=" << _executive->getTotalJobs()); + + vector uberJobs; + + auto czarPtr = czar::Czar::getCzar(); + auto czFamilyMap = czarPtr->getCzarFamilyMap(); + auto czChunkMap = czFamilyMap->getChunkMap(_queryDbName); + auto czRegistry = czarPtr->getCzarRegistry(); + + if (czChunkMap == nullptr) { + LOGS(_log, LOG_LVL_ERROR, funcN << " no map found for queryDbName=" << _queryDbName); + // Make an empty chunk map so all jobs are flagged as needing to be reassigned. + // There's a chance that a family will be replicated by the registry. + czChunkMap = czar::CzarChunkMap::create(); + } + + auto const [chunkMapPtr, workerChunkMapPtr] = czChunkMap->getMaps(); + // Make a map of all jobs in the executive. + // TODO:UJ Maybe a check should be made that all datbases are in the same family? + + qdisp::Executive::ChunkIdJobMapType unassignedChunksInQuery = _executive->unassignedChunksInQuery(); + + // keep cycling through workers until no more chunks to place. + // - create a map of UberJobs key=, val=> + // - for chunkId in `unassignedChunksInQuery` + // - use `chunkMapPtr` to find the shared scan workerId for chunkId + // - if not existing in the map, make a new uberjob + // - if existing uberjob at max jobs, create a new uberjob + // - once all chunks in the query have been put in uberjobs, find contact info + // for each worker + // - add worker to each uberjob. + // - For failures - If a worker cannot be contacted, that's an uberjob failure. + // - uberjob failures (due to communications problems) will result in the uberjob + // being broken up into multiple UberJobs going to different workers. + // - The best way to do this is probably to just kill the UberJob and mark all + // Jobs that were in that UberJob as needing re-assignment, and re-running + // the code here. The trick is going to be figuring out which workers are alive. + // Maybe force a fresh lookup from the replicator Registry when an UberJob fails. + map> workerJobMap; + vector missingChunks; + + // unassignedChunksInQuery needs to be in numerical order so that UberJobs contain chunk numbers in + // numerical order. The workers run shared scans in numerical order of chunk id numbers. + // This keeps the number of partially complete UberJobs running on a worker to a minimum, + // and should minimize the time for the first UberJob on the worker to complete. + for (auto const& [chunkId, jqPtr] : unassignedChunksInQuery) { + auto iter = chunkMapPtr->find(chunkId); + if (iter == chunkMapPtr->end()) { + missingChunks.push_back(chunkId); + bool const increaseAttemptCount = true; + jqPtr->getDescription()->incrAttemptCountScrubResultsJson(_executive, increaseAttemptCount); + // Assign as many jobs as possible. Any chunks not found will be attempted later. + continue; + } + czar::CzarChunkMap::ChunkData::Ptr chunkData = iter->second; + auto targetWorker = chunkData->getPrimaryScanWorker().lock(); + // TODO:UJ maybe if (targetWorker == nullptr || this worker already tried for this chunk) { + if (targetWorker == nullptr) { + LOGS(_log, LOG_LVL_ERROR, funcN << " No primary scan worker for chunk=" << chunkData->dump()); + // Try to assign a different worker to this job + auto workerHasThisChunkMap = chunkData->getWorkerHasThisMapCopy(); + bool found = false; + for (auto wIter = workerHasThisChunkMap.begin(); wIter != workerHasThisChunkMap.end() && !found; + ++wIter) { + auto maybeTarg = wIter->second.lock(); + if (maybeTarg != nullptr) { + targetWorker = maybeTarg; + found = true; + LOGS(_log, LOG_LVL_WARN, + funcN << " Alternate worker found for chunk=" << chunkData->dump()); + } + } + if (!found) { + // If too many workers are down, there will be a chunk that cannot be found. + // Just continuing should leave jobs `unassigned` with their attempt count + // increased. Either the chunk will be found and jobs assigned, or the jobs' + // attempt count will reach max and the query will be cancelled + // TODO:UJ Needs testing/verification + LOGS(_log, LOG_LVL_ERROR, + funcN << " No primary or alternate worker found for chunk=" << chunkData->dump()); + continue; + } + } + // Add this job to the appropriate UberJob, making the UberJob if needed. + string workerId = targetWorker->getWorkerId(); + auto& ujVect = workerJobMap[workerId]; + if (ujVect.empty() || ujVect.back()->getJobCount() >= _maxChunksPerUberJob) { + auto ujId = _uberJobIdSeq++; // keep ujId consistent + string uberResultName = _ttn->make(ujId); + auto respHandler = make_shared(_infileMerger, uberResultName); + auto uJob = qdisp::UberJob::create(_executive, respHandler, _executive->getId(), ujId, + _qMetaCzarId, targetWorker); + ujVect.push_back(uJob); + } + auto& ujVectBack = ujVect.back(); + ujVectBack->addJob(jqPtr); + LOGS(_log, LOG_LVL_DEBUG, + funcN << " ujVectBack{" << ujVectBack->getIdStr() << " jobCnt=" << ujVectBack->getJobCount() + << "}"); + } + + if (!missingChunks.empty()) { + string errStr = funcN + " a worker could not be found for these chunks "; + for (auto const& chk : missingChunks) { + errStr += to_string(chk) + ","; + } + errStr += " they will be retried later."; + LOGS(_log, LOG_LVL_ERROR, errStr); + // There are likely to be unassigned jobs, so set a flag to try to make + // new uber jobs for these jobs. + _executive->setFlagFailedUberJob(true); + } + + // Add worker contact info to UberJobs. + auto const wContactMap = czRegistry->getWorkerContactMap(); + LOGS(_log, LOG_LVL_DEBUG, funcN << " " << _executive->dumpUberJobCounts()); + for (auto const& [wIdKey, ujVect] : workerJobMap) { + auto iter = wContactMap->find(wIdKey); + if (iter == wContactMap->end()) { + // TODO:UJ Not appropriate to throw for this. Need to re-direct all jobs to different workers. + // Also, this really shouldn't happen, but crashing the czar is probably a bad idea, + // so maybe return internal error to the user? + throw util::Bug(ERR_LOC, funcN + " TODO:UJ no contact information for " + wIdKey); + } + auto const& wContactInfo = iter->second; + for (auto const& ujPtr : ujVect) { + ujPtr->setWorkerContactInfo(wContactInfo); + } + _executive->addUberJobs(ujVect); + for (auto const& ujPtr : ujVect) { + _executive->runUberJob(ujPtr); + } + } +} + /// Block until a submit()'ed query completes. /// @return the QueryState indicating success or failure QueryState UserQuerySelect::join() { @@ -396,13 +573,16 @@ void UserQuerySelect::discard() { return; } } + // Make sure resources are released. if (_executive && _executive->getNumInflight() > 0) { throw UserQueryError(getQueryIdString() + " Executive unfinished, cannot discard"); } + _executive.reset(); _messageStore.reset(); _qSession.reset(); + try { _discardMerger(); } catch (UserQueryError const& e) { diff --git a/src/ccontrol/UserQuerySelect.h b/src/ccontrol/UserQuerySelect.h index 70b7d87a89..a01b973cd8 100644 --- a/src/ccontrol/UserQuerySelect.h +++ b/src/ccontrol/UserQuerySelect.h @@ -32,6 +32,7 @@ */ // System headers +#include #include #include #include @@ -50,13 +51,13 @@ // Forward declarations namespace lsst::qserv::qdisp { class Executive; -class MessageStore; class QdispPool; } // namespace lsst::qserv::qdisp namespace lsst::qserv::qmeta { +class MessageStore; class QMeta; -} +} // namespace lsst::qserv::qmeta namespace lsst::qserv::qproc { class DatabaseModels; @@ -80,11 +81,13 @@ class SemaMgr; namespace lsst::qserv::ccontrol { +class TmpTableName; + /// UserQuerySelect : implementation of the UserQuery for regular SELECT statements. class UserQuerySelect : public UserQuery { public: UserQuerySelect(std::shared_ptr const& qs, - std::shared_ptr const& messageStore, + std::shared_ptr const& messageStore, std::shared_ptr const& executive, std::shared_ptr const& dbModels, std::shared_ptr const& infileMergerConfig, @@ -124,7 +127,7 @@ class UserQuerySelect : public UserQuery { void discard() override; // Delegate objects - std::shared_ptr getMessageStore() override { return _messageStore; } + std::shared_ptr getMessageStore() override { return _messageStore; } /// @return Name of the result table for this query, can be empty std::string getResultTableName() const override { return _resultTable; } @@ -151,6 +154,11 @@ class UserQuerySelect : public UserQuery { /// save the result query in the query metadata void saveResultQuery(); + /// Use the query and jobs information in the executive to construct and run whatever + /// UberJobs are needed. This can be called multiple times by Czar::_monitor + /// to reassign failed jobs or jobs that were never assigned. + void buildAndSendUberJobs(); + private: /// @return ORDER BY part of SELECT statement that gets executed by the proxy std::string _getResultOrderBy() const; @@ -169,7 +177,7 @@ class UserQuerySelect : public UserQuery { // Delegate classes std::shared_ptr _qSession; - std::shared_ptr _messageStore; + std::shared_ptr _messageStore; std::shared_ptr _executive; std::shared_ptr _databaseModels; std::shared_ptr _infileMergerConfig; @@ -188,8 +196,21 @@ class UserQuerySelect : public UserQuery { mutable std::string _errorExtra; ///< Additional error information std::string _resultTable; ///< Result table name std::string _resultLoc; ///< Result location - std::string _resultDb; ///< Result database (todo is this the same as resultLoc??) + std::string _resultDb; ///< Result database TODO:UJ same as resultLoc??) bool _async; ///< true for async query + + /// TODO:UJ The maximum number of chunks allowed in an UberJob. At the very + /// least, this needs to be set in the configuration. However, it may also + /// be useful to change this based on the nature of each UserQuery. + int _maxChunksPerUberJob = 1; + std::atomic _uberJobIdSeq{1}; ///< Sequence number for UberJobs in this query. + std::shared_ptr _ttn; ///< Temporary table name generator. + + /// Primary database name for the query. + std::string _queryDbName; + + /// Only one thread should run buildAndSendUberJobs() for this query at a time. + std::mutex _buildUberJobMtx; }; } // namespace lsst::qserv::ccontrol diff --git a/src/ccontrol/UserQuerySelectCountStar.cc b/src/ccontrol/UserQuerySelectCountStar.cc index d796d810ce..d15ea9639d 100644 --- a/src/ccontrol/UserQuerySelectCountStar.cc +++ b/src/ccontrol/UserQuerySelectCountStar.cc @@ -29,7 +29,7 @@ // Qserv headers #include "ccontrol/UserQueryError.h" #include "ccontrol/UserQueryType.h" -#include "qdisp/MessageStore.h" +#include "qmeta/MessageStore.h" #include "qmeta/QInfo.h" #include "qmeta/QMetaSelect.h" #include "query/SelectStmt.h" @@ -62,7 +62,7 @@ UserQuerySelectCountStar::UserQuerySelectCountStar(std::string query, : _resultDbConn(resultDbConn), _qMetaSelect(qMetaSelect), _queryMetadata(queryMetadata), - _messageStore(std::make_shared()), + _messageStore(std::make_shared()), _resultTableName(::g_nextResultTableId(userQueryId)), _userQueryId(userQueryId), _rowsTable(rowsTable), diff --git a/src/ccontrol/UserQuerySelectCountStar.h b/src/ccontrol/UserQuerySelectCountStar.h index 46e5448e7e..681150f842 100644 --- a/src/ccontrol/UserQuerySelectCountStar.h +++ b/src/ccontrol/UserQuerySelectCountStar.h @@ -34,12 +34,10 @@ // Forward decl namespace lsst::qserv { -namespace qdisp { -class MessageStore; -} namespace qmeta { +class MessageStore; class QMetaSelect; -} +} // namespace qmeta namespace query { class SelectStmt; } @@ -82,7 +80,7 @@ class UserQuerySelectCountStar : public UserQuery { void discard() override {} // Delegate objects - std::shared_ptr getMessageStore() override { return _messageStore; } + std::shared_ptr getMessageStore() override { return _messageStore; } /// This method should disappear when we start supporting results /// in locations other than MySQL tables. We'll switch to getResultLocation() @@ -117,7 +115,7 @@ class UserQuerySelectCountStar : public UserQuery { std::shared_ptr _resultDbConn; std::shared_ptr _qMetaSelect; std::shared_ptr const& _queryMetadata; - std::shared_ptr _messageStore; + std::shared_ptr _messageStore; std::string _resultTableName; std::string _userQueryId; std::string _rowsTable; diff --git a/src/ccontrol/UserQuerySet.cc b/src/ccontrol/UserQuerySet.cc index 6007e66a6f..ce6eb55bde 100644 --- a/src/ccontrol/UserQuerySet.cc +++ b/src/ccontrol/UserQuerySet.cc @@ -22,11 +22,11 @@ #include "UserQuerySet.h" // Qserv headers -#include "qdisp/MessageStore.h" +#include "qmeta/MessageStore.h" namespace lsst::qserv::ccontrol { UserQuerySet::UserQuerySet(std::string const& varName, std::string const& varValue) - : _varName(varName), _varValue(varValue), _messageStore(std::make_shared()) {} + : _varName(varName), _varValue(varValue), _messageStore(std::make_shared()) {} } // namespace lsst::qserv::ccontrol diff --git a/src/ccontrol/UserQuerySet.h b/src/ccontrol/UserQuerySet.h index 182653a296..6731ea775c 100644 --- a/src/ccontrol/UserQuerySet.h +++ b/src/ccontrol/UserQuerySet.h @@ -68,7 +68,7 @@ class UserQuerySet : public UserQuery { void discard() override {} // Delegate objects - std::shared_ptr getMessageStore() override { return _messageStore; } + std::shared_ptr getMessageStore() override { return _messageStore; } /// This method should disappear when we start supporting results /// in locations other than MySQL tables. We'll switch to getResultLocation() @@ -91,7 +91,7 @@ class UserQuerySet : public UserQuery { std::string _varName; std::string _varValue; QueryState _qState{SUCCESS}; - std::shared_ptr _messageStore; + std::shared_ptr _messageStore; }; } // namespace lsst::qserv::ccontrol diff --git a/src/czar/CMakeLists.txt b/src/czar/CMakeLists.txt index 69a3d4e4c1..9fb9f540a3 100644 --- a/src/czar/CMakeLists.txt +++ b/src/czar/CMakeLists.txt @@ -9,6 +9,7 @@ target_sources(czar PRIVATE HttpCzarIngestModule.cc HttpCzarQueryModule.cc HttpCzarSvc.cc + HttpCzarWorkerModule.cc HttpMonitorModule.cc HttpSvc.cc MessageTable.cc diff --git a/src/czar/Czar.cc b/src/czar/Czar.cc index d7e588c346..bc73e2eca5 100644 --- a/src/czar/Czar.cc +++ b/src/czar/Czar.cc @@ -54,6 +54,7 @@ #include "http/Method.h" #include "proto/worker.pb.h" #include "qdisp/CzarStats.h" +#include "qdisp/Executive.h" #include "qdisp/QdispPool.h" #include "qdisp/SharedResources.h" #include "qproc/DatabaseModels.h" @@ -76,93 +77,8 @@ extern XrdSsiProvider* XrdSsiProviderClient; namespace { -string const createAsyncResultTmpl( - "CREATE TABLE IF NOT EXISTS %1% " - "(jobId BIGINT, resultLocation VARCHAR(1024))" - "ENGINE=MEMORY;" - "INSERT INTO %1% (jobId, resultLocation) " - "VALUES (%2%, '%3%')"); - LOG_LOGGER _log = LOG_GET("lsst.qserv.czar.Czar"); -/** - * This function will keep periodically updating Czar's info in the Replication - * System's Registry. - * @param name The unique identifier of the Czar to be registered. - * @param czarConfig A pointer to the Czar configuration service. - * @note The thread will terminate the process if the registraton request to the Registry - * was explicitly denied by the service. This means the application may be misconfigured. - * Transient communication errors when attempting to connect or send requests to - * the Registry will be posted into the log stream and ignored. - */ -void registryUpdateLoop(shared_ptr const& czarConfig) { - auto const method = http::Method::POST; - string const url = "http://" + czarConfig->replicationRegistryHost() + ":" + - to_string(czarConfig->replicationRegistryPort()) + "/czar"; - vector const headers = {"Content-Type: application/json"}; - json const request = json::object({{"version", http::MetaModule::version}, - {"instance_id", czarConfig->replicationInstanceId()}, - {"auth_key", czarConfig->replicationAuthKey()}, - {"czar", - {{"name", czarConfig->name()}, - {"id", czarConfig->id()}, - {"management-port", czarConfig->replicationHttpPort()}, - {"management-host-name", util::get_current_host_fqdn()}}}}); - string const requestContext = "Czar: '" + http::method2string(method) + "' request to '" + url + "'"; - LOGS(_log, LOG_LVL_WARN, "&&&czarPost url=" << url); - LOGS(_log, LOG_LVL_WARN, "&&&czarPost request=" << request.dump()); - LOGS(_log, LOG_LVL_WARN, "&&&czarPost headers=" << headers[0]); - http::Client client(method, url, request.dump(), headers); - while (true) { - try { - json const response = client.readAsJson(); - if (0 == response.at("success").get()) { - string const error = response.at("error").get(); - LOGS(_log, LOG_LVL_ERROR, requestContext + " was denied, error: '" + error + "'."); - abort(); - } - } catch (exception const& ex) { - LOGS(_log, LOG_LVL_WARN, requestContext + " failed, ex: " + ex.what()); - } - this_thread::sleep_for(chrono::seconds(max(1U, czarConfig->replicationRegistryHearbeatIvalSec()))); - } -} - -// &&& doc -void registryWorkerInfoLoop(shared_ptr const& czarConfig) { - // Get worker information from the registry - auto const method = http::Method::GET; - string const url = - "http://" + czarConfig->replicationRegistryHost() + ":" + - to_string(czarConfig->replicationRegistryPort()) + "/services?instance_id=" + - czarConfig->replicationInstanceId(); // &&& what is this value supposed to be to get worker info? - vector const headers = {"Content-Type: application/json"}; - json request = nlohmann::json(); - string const requestContext = "Czar: '" + http::method2string(method) + "' request to '" + url + "'"; - LOGS(_log, LOG_LVL_WARN, "&&&czarGet url=" << url); - LOGS(_log, LOG_LVL_WARN, "&&&czarGet request=" << request.dump()); - LOGS(_log, LOG_LVL_WARN, "&&&czarGet headers=" << headers[0]); - http::Client client(method, url, request.dump(), headers); - while (true) { - LOGS(_log, LOG_LVL_WARN, "&&&czarGet loop start"); - try { - json const response = client.readAsJson(); - /* &&& - if (0 == response.at("success").get()) { - string const error = response.at("error").get(); - LOGS(_log, LOG_LVL_ERROR, requestContext + " was denied, error: '" + error + "'."); - abort(); - } - */ - LOGS(_log, LOG_LVL_WARN, "&&&czarGet resp=" << response); - } catch (exception const& ex) { - LOGS(_log, LOG_LVL_WARN, requestContext + " failed, ex: " + ex.what()); - LOGS(_log, LOG_LVL_WARN, requestContext + " &&& failed, ex: " + ex.what()); - } - this_thread::sleep_for(chrono::seconds(15)); - } -} - } // anonymous namespace namespace lsst::qserv::czar { @@ -174,6 +90,61 @@ Czar::Ptr Czar::createCzar(string const& configFilePath, string const& czarName) return _czar; } +void Czar::_monitor() { + string const funcN("Czar::_monitor"); + while (_monitorLoop) { + this_thread::sleep_for(_monitorSleepTime); + LOGS(_log, LOG_LVL_DEBUG, funcN << " start0"); + + /// Check database for changes in worker chunk assignments and aliveness + _czarFamilyMap->read(); + + // TODO:UJ DM-45470 If there were changes in `_czarFamilyMap`, + // see if any workers went down. If any did, `_unassign` all + // Jobs in UberJobs for the downed workers. The `_unassigned` + // Jobs should get reassigned in the next section `assignJobsToUberJobs`. + + /// Create new UberJobs (if possible) for all jobs that are + /// unassigned for any reason. + map> execMap; + { + // Make a copy of all valid Executives + lock_guard execMapLock(_executiveMapMtx); + auto iter = _executiveMap.begin(); + while (iter != _executiveMap.end()) { + auto qIdKey = iter->first; + shared_ptr exec = iter->second.lock(); + if (exec == nullptr) { + iter = _executiveMap.erase(iter); + } else { + execMap[qIdKey] = exec; + ++iter; + } + } + } + // Use the copy to create new UberJobs as needed + for (auto&& [qIdKey, execVal] : execMap) { + execVal->assignJobsToUberJobs(); + } + + // TODO:UJ DM-45470 Maybe get missing results from workers. + // This would be files that workers sent messages to the czar to + // collect, but there was a communication problem and the czar didn't get the message + // or didn't collect the file. to retrieve complete files that haven't been + // collected. + // Basically, is there a reasonable way to check that all UberJobs are being handled + // and nothing has fallen through the cracks? + + // TODO:UJ Maybe send a list of cancelled and completed queries to the workers? + // How long should queryId's remain on this list? + // It's probably better to have the executive for a query to send out + // messages to worker that a user query was cancelled. If a worker sends + // the czar about a cancelled user query, or the executive for that + // query cannot be found, the worker should cancel all Tasks associated + // with that queryId. + } +} + // Constructors Czar::Czar(string const& configFilePath, string const& czarName) : _czarName(czarName), @@ -197,11 +168,8 @@ Czar::Czar(string const& configFilePath, string const& czarName) // the name of the Czar gets translated into a numeric identifier. _czarConfig->setId(_uqFactory->userQuerySharedResources()->qMetaCzarId); - try { - _czarChunkMap = CzarChunkMap::create(_uqFactory->userQuerySharedResources()->queryMetadata); - } catch (ChunkMapException const& exc) { - LOGS(_log, LOG_LVL_WARN, string(__func__) + " failed to create CzarChunkMap " + exc.what()); - } + // This will block until there is a successful read of the database tables. + _czarFamilyMap = CzarFamilyMap::create(_uqFactory->userQuerySharedResources()->queryMetadata); // Tell workers to cancel any queries that were submitted before this restart of Czar. // Figure out which query (if any) was recorded in Czar database before the restart. @@ -270,6 +238,17 @@ Czar::Czar(string const& configFilePath, string const& czarName) _czarConfig->setReplicationHttpPort(port); _czarRegistry = CzarRegistry::create(_czarConfig); + + // Start the monitor thread + thread monitorThrd(&Czar::_monitor, this); + _monitorThrd = move(monitorThrd); +} + +Czar::~Czar() { + LOGS(_log, LOG_LVL_DEBUG, "Czar::~Czar()"); + _monitorLoop = false; + _monitorThrd.join(); + LOGS(_log, LOG_LVL_DEBUG, "Czar::~Czar() end"); } SubmitResult Czar::submitQuery(string const& query, map const& hints) { @@ -342,6 +321,7 @@ SubmitResult Czar::submitQuery(string const& query, map const& h // spawn background thread to wait until query finishes to unlock, // note that lambda stores copies of uq and msgTable. auto finalizer = [uq, msgTable]() mutable { + string qidstr = to_string(uq->getQueryId()); // Add logging context with query ID QSERV_LOGCONTEXT_QUERY(uq->getQueryId()); LOGS(_log, LOG_LVL_DEBUG, "submitting new query"); @@ -355,6 +335,7 @@ SubmitResult Czar::submitQuery(string const& query, map const& h // will likely hang because table may still be locked. LOGS(_log, LOG_LVL_ERROR, "Query finalization failed (client likely hangs): " << exc.what()); } + uq.reset(); }; LOGS(_log, LOG_LVL_DEBUG, "starting finalizer thread for query"); thread finalThread(finalizer); @@ -516,8 +497,15 @@ void Czar::_makeAsyncResult(string const& asyncResultTable, QueryId queryId, str throw exc; } + string const createAsyncResultTmpl( + "CREATE TABLE IF NOT EXISTS %1% " + "(jobId BIGINT, resultLocation VARCHAR(1024))" + "ENGINE=MEMORY;" + "INSERT INTO %1% (jobId, resultLocation) " + "VALUES (%2%, '%3%')"); + string query = - (boost::format(::createAsyncResultTmpl) % asyncResultTable % queryId % resultLocEscaped).str(); + (boost::format(createAsyncResultTmpl) % asyncResultTable % queryId % resultLocEscaped).str(); if (not sqlConn->runQuery(query, sqlErr)) { SqlError exc(ERR_LOC, "Failure creating async result table", sqlErr); @@ -537,7 +525,7 @@ void Czar::removeOldResultTables() { _lastRemovedTimer.start(); _removingOldTables = true; // Run in a separate thread in the off chance this takes a while. - thread t([this]() { + thread thrd([this]() { LOGS(_log, LOG_LVL_INFO, "Removing old result database tables."); auto sqlConn = sql::SqlConnectionFactory::make(_czarConfig->getMySqlResultConfig()); string dbName = _czarConfig->getMySqlResultConfig().dbName; @@ -583,8 +571,8 @@ void Czar::removeOldResultTables() { } _removingOldTables = false; }); - t.detach(); - _oldTableRemovalThread = std::move(t); + thrd.detach(); + _oldTableRemovalThread = std::move(thrd); } SubmitResult Czar::getQueryInfo(QueryId queryId) const { @@ -687,4 +675,22 @@ QueryId Czar::_lastQueryIdBeforeRestart() const { return stoull(queryIdStr); } +void Czar::insertExecutive(QueryId qId, std::shared_ptr const& execPtr) { + lock_guard lgMap(_executiveMapMtx); + _executiveMap[qId] = execPtr; +} + +std::shared_ptr Czar::getExecutiveFromMap(QueryId qId) { + lock_guard lgMap(_executiveMapMtx); + auto iter = _executiveMap.find(qId); + if (iter == _executiveMap.end()) { + return nullptr; + } + std::shared_ptr exec = iter->second.lock(); + if (exec == nullptr) { + _executiveMap.erase(iter); + } + return exec; +} + } // namespace lsst::qserv::czar diff --git a/src/czar/Czar.h b/src/czar/Czar.h index 36878c9aa5..9a39eaccee 100644 --- a/src/czar/Czar.h +++ b/src/czar/Czar.h @@ -59,9 +59,13 @@ namespace lsst::qserv::util { class FileMonitor; } // namespace lsst::qserv::util +namespace lsst::qserv::qdisp { +class Executive; +} // namespace lsst::qserv::qdisp + namespace lsst::qserv::czar { -class CzarChunkMap; +class CzarFamilyMap; class CzarRegistry; /// @addtogroup czar @@ -77,6 +81,7 @@ class Czar { Czar(Czar const&) = delete; Czar& operator=(Czar const&) = delete; + ~Czar(); /** * Submit query for execution. @@ -128,10 +133,16 @@ class Czar { /// @return The reconstructed info for the query SubmitResult getQueryInfo(QueryId queryId) const; - std::shared_ptr getCzarChunkMap() const { return _czarChunkMap; } + std::shared_ptr getCzarFamilyMap() const { return _czarFamilyMap; } std::shared_ptr getCzarRegistry() const { return _czarRegistry; } + /// Add an Executive to the map of executives. + void insertExecutive(QueryId qId, std::shared_ptr const& execPtr); + + /// Get the executive associated with `qId`, this may be nullptr. + std::shared_ptr getExecutiveFromMap(QueryId qId); + private: /// Private constructor for singleton. Czar(std::string const& configFilePath, std::string const& czarName); @@ -151,6 +162,9 @@ class Czar { /// @return An identifier of the last query that was recorded in the query metadata table QueryId _lastQueryIdBeforeRestart() const; + /// Periodically check for system changes and use those changes to try to finish queries. + void _monitor(); + static Ptr _czar; ///< Pointer to single instance of the Czar. // combines client name (ID) and its thread ID into one unique ID @@ -188,10 +202,21 @@ class Czar { std::shared_ptr _controlHttpSvc; /// Map of which chunks on which workers and shared scan order. - std::shared_ptr _czarChunkMap; + std::shared_ptr _czarFamilyMap; /// Connection to the registry to register the czar and get worker contact information. std::shared_ptr _czarRegistry; + + std::mutex _executiveMapMtx; ///< protects _executiveMap + std::map> + _executiveMap; ///< Map of executives for queries in progress. + + std::thread _monitorThrd; ///< Thread to run the _monitor() + + /// Set to false on system shutdown to stop _monitorThrd. + std::atomic _monitorLoop{true}; + std::chrono::milliseconds _monitorSleepTime{ + 15000}; ///< Wait time between checks. TODO:UJ set from config }; } // namespace lsst::qserv::czar diff --git a/src/czar/CzarChunkMap.cc b/src/czar/CzarChunkMap.cc index 164ad9dd3e..166c6414be 100644 --- a/src/czar/CzarChunkMap.cc +++ b/src/czar/CzarChunkMap.cc @@ -30,6 +30,7 @@ #include "lsst/log/Log.h" // Qserv headers +#include "qmeta/QMeta.h" #include "czar/Czar.h" #include "czar/CzarRegistry.h" #include "qmeta/Exceptions.h" @@ -44,178 +45,11 @@ LOG_LOGGER _log = LOG_GET("lsst.qserv.czar.CzarChunkMap"); namespace lsst::qserv::czar { -CzarChunkMap::CzarChunkMap(std::shared_ptr const& qmeta) : _qmeta(qmeta) { - try { - auto mapsSet = _read(); - if (!mapsSet) { - throw ChunkMapException(ERR_LOC, "CzarChunkMap maps were not set in contructor"); - } - } catch (qmeta::QMetaError const& qExc) { - LOGS(_log, LOG_LVL_ERROR, __func__ << " CzarChunkMap could not read DB " << qExc.what()); - throw ChunkMapException(ERR_LOC, string(" CzarChunkMap constructor failed read ") + qExc.what()); - } -} - -bool CzarChunkMap::_read() { - LOGS(_log, LOG_LVL_TRACE, "CzarChunkMap::_read() start"); - // If replacing the map, this may take a bit of time, but it's probably - // better to wait for new maps if something changed. - std::lock_guard gLock(_mapMtx); - qmeta::QMeta::ChunkMap qChunkMap = _qmeta->getChunkMap(); - if (_lastUpdateTime >= qChunkMap.updateTime) { - LOGS(_log, LOG_LVL_DEBUG, - __func__ << " CzarChunkMap no need to read " - << util::TimeUtils::timePointToDateTimeString(_lastUpdateTime) - << " db=" << util::TimeUtils::timePointToDateTimeString(qChunkMap.updateTime)); - return false; - } - - // Make the new maps. - auto [chunkMapPtr, wcMapPtr] = makeNewMaps(qChunkMap); - - verify(*chunkMapPtr, *wcMapPtr); - - LOGS(_log, LOG_LVL_DEBUG, " chunkMap=" << dumpChunkMap(*chunkMapPtr)); - LOGS(_log, LOG_LVL_DEBUG, " workerChunkMap=" << dumpWorkerChunkMap(*wcMapPtr)); - - _workerChunkMap = wcMapPtr; - _chunkMap = chunkMapPtr; - _lastUpdateTime = qChunkMap.updateTime; - - LOGS(_log, LOG_LVL_TRACE, "CzarChunkMap::_read() end"); - return true; -} - -pair, shared_ptr> CzarChunkMap::makeNewMaps( - qmeta::QMeta::ChunkMap const& qChunkMap) { - // Create new maps. - auto wcMapPtr = make_shared(); - auto chunkMapPtr = make_shared(); - - // Workers -> Databases map - for (auto const& [workerId, dbs] : qChunkMap.workers) { - // Databases -> Tables map - for (auto const& [dbName, tables] : dbs) { - // Tables -> Chunks map - for (auto const& [tableName, chunks] : tables) { - // vector of ChunkInfo - for (qmeta::QMeta::ChunkMap::ChunkInfo const& chunkInfo : chunks) { - try { - int64_t chunkNum = chunkInfo.chunk; - SizeT sz = chunkInfo.size; - LOGS(_log, LOG_LVL_DEBUG, - "workerdId=" << workerId << " db=" << dbName << " table=" << tableName - << " chunk=" << chunkNum << " sz=" << sz); - insertIntoChunkMap(*wcMapPtr, *chunkMapPtr, workerId, dbName, tableName, chunkNum, - sz); - } catch (invalid_argument const& exc) { - throw ChunkMapException( - ERR_LOC, string(__func__) + " invalid_argument workerdId=" + workerId + - " db=" + dbName + " table=" + tableName + - " chunk=" + to_string(chunkInfo.chunk) + " " + exc.what()); - } catch (out_of_range const& exc) { - throw ChunkMapException( - ERR_LOC, string(__func__) + " out_of_range workerdId=" + workerId + - " db=" + dbName + " table=" + tableName + - " chunk=" + to_string(chunkInfo.chunk) + " " + exc.what()); - } - } - } - } - } - - auto chunksSortedBySize = make_shared(); - calcChunkMap(*chunkMapPtr, *chunksSortedBySize); - - // At this point we have - // - wcMapPtr has a map of workerData by worker id with each worker having a map of ChunkData - // - chunkMapPtr has a map of all chunkData by chunk id - // - chunksSortedBySize a list of chunks sorted with largest first. - // From here need to assign shared scan chunk priority - // Go through the chunksSortedBySize list and assign each chunk to worker that has it with the smallest - // totalScanSize. - for (auto&& chunkData : *chunksSortedBySize) { - SizeT smallest = std::numeric_limits::max(); - WorkerChunksData::Ptr smallestWkr = nullptr; - for (auto&& [wkrId, wkrDataWeak] : chunkData->_workerHasThisMap) { - auto wkrData = wkrDataWeak.lock(); - if (wkrData == nullptr) { - LOGS(_log, LOG_LVL_ERROR, __func__ << " unexpected null weak ptr for " << wkrId); - continue; // maybe the next one will be ok. - } - LOGS(_log, LOG_LVL_DEBUG, - __func__ << " wkrId=" << wkrData << " tsz=" << wkrData->_sharedScanTotalSize - << " smallest=" << smallest); - if (wkrData->_sharedScanTotalSize < smallest) { - smallestWkr = wkrData; - smallest = smallestWkr->_sharedScanTotalSize; - } - } - if (smallestWkr == nullptr) { - throw ChunkMapException(ERR_LOC, string(__func__) + " no smallesWkr found for chunk=" + - to_string(chunkData->_chunkId)); - } - smallestWkr->_sharedScanChunkMap[chunkData->_chunkId] = chunkData; - smallestWkr->_sharedScanTotalSize += chunkData->_totalBytes; - chunkData->_primaryScanWorker = smallestWkr; - LOGS(_log, LOG_LVL_DEBUG, - " chunk=" << chunkData->_chunkId << " assigned to scan on " << smallestWkr->_workerId); - } +CzarChunkMap::CzarChunkMap() {} - LOGS(_log, LOG_LVL_TRACE, " chunkMap=" << dumpChunkMap(*chunkMapPtr)); - LOGS(_log, LOG_LVL_TRACE, " workerChunkMap=" << dumpWorkerChunkMap(*wcMapPtr)); - return {chunkMapPtr, wcMapPtr}; -} +CzarChunkMap::~CzarChunkMap() { LOGS(_log, LOG_LVL_DEBUG, "CzarChunkMap::~CzarChunkMap()"); } -void CzarChunkMap::insertIntoChunkMap(WorkerChunkMap& wcMap, ChunkMap& chunkMap, string const& workerId, - string const& dbName, string const& tableName, int64_t chunkIdNum, - SizeT sz) { - // Get or make the worker entry - WorkerChunksData::Ptr workerChunksData; - auto iterWC = wcMap.find(workerId); - if (iterWC == wcMap.end()) { - workerChunksData = WorkerChunksData::Ptr(new WorkerChunksData(workerId)); - wcMap[workerId] = workerChunksData; - } else { - workerChunksData = iterWC->second; - } - - // Get or make the ChunkData entry in chunkMap - ChunkData::Ptr chunkData; - auto iterChunkData = chunkMap.find(chunkIdNum); - if (iterChunkData == chunkMap.end()) { - chunkData = ChunkData::Ptr(new ChunkData(chunkIdNum)); - chunkMap[chunkIdNum] = chunkData; - } else { - chunkData = iterChunkData->second; - } - - // Set or verify the table information - auto iterDT = chunkData->_dbTableMap.find({dbName, tableName}); - if (iterDT == chunkData->_dbTableMap.end()) { - // doesn't exist so set it up - chunkData->_dbTableMap[{dbName, tableName}] = sz; - } else { - // Verify that it matches other data - auto const& dbTbl = iterDT->first; - auto tblSz = iterDT->second; - auto const& dbN = dbTbl.first; - auto const& tblN = dbTbl.second; - if (dbName != dbN || tblN != tableName || tblSz != sz) { - LOGS(_log, LOG_LVL_ERROR, - __func__ << " data mismatch for " << dbName << "." << tableName << "=" << sz << " vs " << dbN - << "." << tblN << "=" << tblSz); - } - } - - // Link WorkerData the single chunkData instance for the chunkId - workerChunksData->_chunkDataMap[chunkIdNum] = chunkData; - - // Add worker to the list of workers containing the chunk. - chunkData->addToWorkerHasThis(workerChunksData); -} - -void CzarChunkMap::calcChunkMap(ChunkMap& chunkMap, ChunkVector& chunksSortedBySize) { +void CzarChunkMap::calcChunkMap(ChunkMap const& chunkMap, ChunkVector& chunksSortedBySize) { // Calculate total bytes for all chunks. for (auto&& [chunkIdNum, chunkData] : chunkMap) { chunkData->_calcTotalBytes(); @@ -236,7 +70,9 @@ void CzarChunkMap::sortChunks(std::vector& chunksSortedBySize) { std::sort(chunksSortedBySize.begin(), chunksSortedBySize.end(), sortBySizeDesc); } -void CzarChunkMap::verify(ChunkMap const& chunkMap, WorkerChunkMap const& wcMap) { +void CzarChunkMap::verify() { + auto&& wcMap = *_workerChunkMap; + auto&& chunkMap = *_chunkMap; // Use a set to prevent duplicate ids caused by replication levels > 1. set allChunkIds; int errorCount = 0; @@ -287,6 +123,9 @@ void CzarChunkMap::verify(ChunkMap const& chunkMap, WorkerChunkMap const& wcMap) } if (errorCount > 0) { + // TODO:UJ There may be an argument to keep the new maps even if there are problems + // with them. For current testing, it's probably best to leave it how it is so that + // it's easier to isolate problems. throw ChunkMapException(ERR_LOC, "verification failed with " + to_string(errorCount) + " errors"); } } @@ -328,6 +167,53 @@ void CzarChunkMap::ChunkData::addToWorkerHasThis(std::shared_ptr_workerId] = worker; } +std::map> +CzarChunkMap::ChunkData::getWorkerHasThisMapCopy() const { + std::map> newMap = _workerHasThisMap; + return newMap; +} + +void CzarChunkMap::organize() { + auto chunksSortedBySize = make_shared(); + + calcChunkMap(*_chunkMap, *chunksSortedBySize); + + // At this point we have + // - _workerChunkMap has a map of workerData by worker id with each worker having a map of ChunkData + // - _chunkMap has a map of all chunkData by chunk id + // - chunksSortedBySize a list of chunks sorted with largest first. + // From here need to assign shared scan chunk priority + // Go through the chunksSortedBySize list and assign each chunk to worker that has it with the smallest + // totalScanSize. + for (auto&& chunkData : *chunksSortedBySize) { + SizeT smallest = std::numeric_limits::max(); + WorkerChunksData::Ptr smallestWkr = nullptr; + for (auto&& [wkrId, wkrDataWeak] : chunkData->_workerHasThisMap) { + auto wkrData = wkrDataWeak.lock(); + if (wkrData == nullptr) { + LOGS(_log, LOG_LVL_ERROR, __func__ << " unexpected null weak ptr for " << wkrId); + continue; // maybe the next one will be okay. + } + LOGS(_log, LOG_LVL_DEBUG, + __func__ << " wkrId=" << wkrData << " tsz=" << wkrData->_sharedScanTotalSize + << " smallest=" << smallest); + if (wkrData->_sharedScanTotalSize < smallest) { + smallestWkr = wkrData; + smallest = smallestWkr->_sharedScanTotalSize; + } + } + if (smallestWkr == nullptr) { + throw ChunkMapException(ERR_LOC, string(__func__) + " no smallesWkr found for chunk=" + + to_string(chunkData->_chunkId)); + } + smallestWkr->_sharedScanChunkMap[chunkData->_chunkId] = chunkData; + smallestWkr->_sharedScanTotalSize += chunkData->_totalBytes; + chunkData->_primaryScanWorker = smallestWkr; + LOGS(_log, LOG_LVL_DEBUG, + " chunk=" << chunkData->_chunkId << " assigned to scan on " << smallestWkr->_workerId); + } +} + string CzarChunkMap::ChunkData::dump() const { stringstream os; auto primaryWorker = _primaryScanWorker.lock(); @@ -360,4 +246,191 @@ string CzarChunkMap::WorkerChunksData::dump() const { return os.str(); } +CzarFamilyMap::Ptr CzarFamilyMap::create(std::shared_ptr const& qmeta) { + // There's nothing the czar can do until with user queries until there's been at least + // one successful read of the database family tables, as the czar doesn't know where to find anything. + Ptr newPtr = nullptr; + while (newPtr == nullptr) { + try { + newPtr = Ptr(new CzarFamilyMap(qmeta)); + } catch (ChunkMapException const& exc) { + LOGS(_log, LOG_LVL_WARN, "Could not create CzarFamilyMap, sleep and retry " << exc.what()); + } + if (newPtr == nullptr) { + this_thread::sleep_for(10s); + } + } + + return newPtr; +} + +CzarFamilyMap::CzarFamilyMap(std::shared_ptr const& qmeta) : _qmeta(qmeta) { + try { + auto mapsSet = _read(); + if (!mapsSet) { + throw ChunkMapException(ERR_LOC, cName(__func__) + " maps were not set in constructor"); + } + } catch (qmeta::QMetaError const& qExc) { + LOGS(_log, LOG_LVL_ERROR, cName(__func__) << " could not read DB " << qExc.what()); + throw ChunkMapException(ERR_LOC, cName(__func__) + " constructor failed read " + qExc.what()); + } +} + +bool CzarFamilyMap::read() { + bool mapsSet = false; + try { + mapsSet = _read(); + } catch (qmeta::QMetaError const& qExc) { + LOGS(_log, LOG_LVL_ERROR, cName(__func__) + " could not read DB " << qExc.what()); + } + return mapsSet; +} + +bool CzarFamilyMap::_read() { + LOGS(_log, LOG_LVL_TRACE, "CzarFamilyMap::_read() start"); + // If replacing the map, this may take a bit of time, but it's probably + // better to wait for new maps if something changed. + std::lock_guard gLock(_familyMapMtx); + qmeta::QMetaChunkMap qChunkMap = _qmeta->getChunkMap(_lastUpdateTime); + if (_lastUpdateTime >= qChunkMap.updateTime) { + LOGS(_log, LOG_LVL_DEBUG, + cName(__func__) << " no need to read " + << util::TimeUtils::timePointToDateTimeString(_lastUpdateTime) + << " db=" << util::TimeUtils::timePointToDateTimeString(qChunkMap.updateTime)); + return false; + } + + // Make the new maps. + shared_ptr familyMapPtr = makeNewMaps(qChunkMap); + + verify(familyMapPtr); + + _familyMap = familyMapPtr; + + _lastUpdateTime = qChunkMap.updateTime; + + LOGS(_log, LOG_LVL_TRACE, "CzarChunkMap::_read() end"); + return true; +} + +std::shared_ptr CzarFamilyMap::makeNewMaps( + qmeta::QMetaChunkMap const& qChunkMap) { + // Create new maps. + std::shared_ptr newFamilyMap = make_shared(); + + // Workers -> Databases map + LOGS(_log, LOG_LVL_DEBUG, cName(__func__) << " workers.sz=" << qChunkMap.workers.size()); + for (auto const& [workerId, dbs] : qChunkMap.workers) { + // Databases -> Tables map + for (auto const& [dbName, tables] : dbs) { + // Tables -> Chunks map + for (auto const& [tableName, chunks] : tables) { + // vector of ChunkInfo + for (qmeta::QMetaChunkMap::ChunkInfo const& chunkInfo : chunks) { + try { + int64_t chunkNum = chunkInfo.chunk; + CzarChunkMap::SizeT sz = chunkInfo.size; + LOGS(_log, LOG_LVL_DEBUG, + cName(__func__) << "workerdId=" << workerId << " db=" << dbName << " table=" + << tableName << " chunk=" << chunkNum << " sz=" << sz); + insertIntoMaps(newFamilyMap, workerId, dbName, tableName, chunkNum, sz); + } catch (invalid_argument const& exc) { + throw ChunkMapException( + ERR_LOC, cName(__func__) + " invalid_argument workerdId=" + workerId + + " db=" + dbName + " table=" + tableName + + " chunk=" + to_string(chunkInfo.chunk) + " " + exc.what()); + } catch (out_of_range const& exc) { + throw ChunkMapException( + ERR_LOC, cName(__func__) + " out_of_range workerdId=" + workerId + + " db=" + dbName + " table=" + tableName + + " chunk=" + to_string(chunkInfo.chunk) + " " + exc.what()); + } + } + } + } + } + + // this needs to be done for each CzarChunkMap in the family map. + for (auto&& [familyName, chunkMapPtr] : *newFamilyMap) { + LOGS(_log, LOG_LVL_DEBUG, cName(__func__) << " working on " << familyName); + chunkMapPtr->organize(); + } + + return newFamilyMap; +} + +void CzarFamilyMap::insertIntoMaps(std::shared_ptr const& newFamilyMap, string const& workerId, + string const& dbName, string const& tableName, int64_t chunkIdNum, + CzarChunkMap::SizeT sz) { + // Get the CzarChunkMap for this family + auto familyName = getFamilyNameFromDbName(dbName); + LOGS(_log, LOG_LVL_INFO, + cName(__func__) << " familyInsrt{w=" << workerId << " fN=" << familyName << " dbN=" << dbName + << " tblN=" << tableName << " chunk=" << chunkIdNum << " sz=" << sz << "}"); + auto& nfMap = *newFamilyMap; + CzarChunkMap::Ptr czarChunkMap; + auto familyIter = nfMap.find(familyName); + if (familyIter == nfMap.end()) { + czarChunkMap = CzarChunkMap::Ptr(new CzarChunkMap()); + nfMap[familyName] = czarChunkMap; + } else { + czarChunkMap = familyIter->second; + } + + auto [chunkMapPtr, wcMapPtr] = czarChunkMap->_getMaps(); + + CzarChunkMap::WorkerChunkMap& wcMap = *wcMapPtr; + CzarChunkMap::ChunkMap& chunkMap = *chunkMapPtr; + + // Get or make the worker entry + CzarChunkMap::WorkerChunksData::Ptr workerChunksData; + auto iterWC = wcMap.find(workerId); + if (iterWC == wcMap.end()) { + workerChunksData = CzarChunkMap::WorkerChunksData::Ptr(new CzarChunkMap::WorkerChunksData(workerId)); + wcMap[workerId] = workerChunksData; + } else { + workerChunksData = iterWC->second; + } + + // Get or make the ChunkData entry in chunkMap + CzarChunkMap::ChunkData::Ptr chunkData; + auto iterChunkData = chunkMap.find(chunkIdNum); + if (iterChunkData == chunkMap.end()) { + chunkData = CzarChunkMap::ChunkData::Ptr(new CzarChunkMap::ChunkData(chunkIdNum)); + chunkMap[chunkIdNum] = chunkData; + } else { + chunkData = iterChunkData->second; + } + + // Set or verify the table information + auto iterDT = chunkData->_dbTableMap.find({dbName, tableName}); + if (iterDT == chunkData->_dbTableMap.end()) { + // doesn't exist so set it up + chunkData->_dbTableMap[{dbName, tableName}] = sz; + } else { + // Verify that it matches other data + auto const& dbTbl = iterDT->first; + auto tblSz = iterDT->second; + auto const& dbN = dbTbl.first; + auto const& tblN = dbTbl.second; + if (dbName != dbN || tblN != tableName || tblSz != sz) { + LOGS(_log, LOG_LVL_ERROR, + cName(__func__) << " data mismatch for " << dbName << "." << tableName << "=" << sz << " vs " + << dbN << "." << tblN << "=" << tblSz); + } + } + + // Link WorkerData the single chunkData instance for the chunkId + workerChunksData->_chunkDataMap[chunkIdNum] = chunkData; + + // Add worker to the list of workers containing the chunk. + chunkData->addToWorkerHasThis(workerChunksData); +} + +void CzarFamilyMap::verify(std::shared_ptr const& familyMap) { + for (auto&& [familyName, czarChunkMapPtr] : *familyMap) { + czarChunkMapPtr->verify(); + } +} + } // namespace lsst::qserv::czar diff --git a/src/czar/CzarChunkMap.h b/src/czar/CzarChunkMap.h index f15eda58f8..f0b85a1d31 100644 --- a/src/czar/CzarChunkMap.h +++ b/src/czar/CzarChunkMap.h @@ -26,37 +26,30 @@ // System headers #include #include +#include #include #include -#include #include #include // Qserv headers #include "global/clock_defs.h" -#include "qmeta/QMeta.h" #include "util/Issue.h" +namespace lsst::qserv::qmeta { +class QMeta; +struct QMetaChunkMap; +} // namespace lsst::qserv::qmeta + namespace lsst::qserv::czar { +class CzarFamilyMap; + class ChunkMapException : public util::Issue { public: ChunkMapException(Context const& ctx, std::string const& msg) : util::Issue(ctx, msg) {} }; -/// This class is used to organize worker chunk table information so that it -/// can be used to send jobs to the appropriate worker and inform workers -/// what chunks they can expect to handle in shared scans. -/// The data for the maps is provided by the Replicator and stored in the -/// QMeta database. -/// When the data is changed, there is a timestamp that is updated, which -/// will cause new maps to be made by this class. -/// -/// The maps generated are constant objects stored with shared pointers. As -/// such, it should be possible for numerous threads to use each map -/// simultaneously provided they have their own pointers to the maps. -/// The pointers to the maps are mutex protected to safely allow map updates. -/// /// The czar is expected to heavily use the /// `getMaps() -> WorkerChunkMap -> getSharedScanChunkMap()` /// to send jobs to workers, as that gets an ordered list of all chunks @@ -78,11 +71,13 @@ class CzarChunkMap { using Ptr = std::shared_ptr; using SizeT = uint64_t; - CzarChunkMap() = delete; CzarChunkMap(CzarChunkMap const&) = delete; CzarChunkMap& operator=(CzarChunkMap const&) = delete; - static Ptr create(std::shared_ptr const& qmeta) { return Ptr(new CzarChunkMap(qmeta)); } + // static Ptr create(std::shared_ptr const& qmeta) { return Ptr(new CzarChunkMap(qmeta)); } + static Ptr create() { return Ptr(new CzarChunkMap()); } + + ~CzarChunkMap(); class WorkerChunksData; @@ -103,9 +98,13 @@ class CzarChunkMap { /// of this chunk. void addToWorkerHasThis(std::shared_ptr const& worker); + /// Return a copy of _workerHasThisMap. + std::map> getWorkerHasThisMapCopy() const; + std::string dump() const; friend CzarChunkMap; + friend CzarFamilyMap; private: int64_t const _chunkId; ///< The Id number for this chunk. @@ -142,6 +141,7 @@ class CzarChunkMap { std::string dump() const; friend CzarChunkMap; + friend CzarFamilyMap; private: std::string const _workerId; @@ -168,31 +168,14 @@ class CzarChunkMap { /// Sort the chunks in `chunksSortedBySize` in descending order by total size in bytes. static void sortChunks(ChunkVector& chunksSortedBySize); - /// Insert the chunk table described into the correct locations in - /// `wcMap` and `chunkMap`. - /// @param `wcMap` - WorkerChunkMap being constructed. - /// @param `chunkMap` - ChunkMap being constructed. - /// @param `workerId` - worker id where this table was found. - /// @param `dbName` - database name for the table being inserted. - /// @param `tableName` - table name for the table being inserted. - /// @param `chunkIdNum` - chunk id number for the table being inserted. - /// @param `sz` - size in bytes of the table being inserted. - static void insertIntoChunkMap(WorkerChunkMap& wcMap, ChunkMap& chunkMap, std::string const& workerId, - std::string const& dbName, std::string const& tableName, - int64_t chunkIdNum, SizeT sz); - /// Calculate the total bytes in each chunk and then sort the resulting ChunkVector by chunk size, /// descending. - static void calcChunkMap(ChunkMap& chunkMap, ChunkVector& chunksSortedBySize); - - /// Make new ChunkMap and WorkerChunkMap from the data in `qChunkMap`. - static std::pair, std::shared_ptr> - makeNewMaps(qmeta::QMeta::ChunkMap const& qChunkMap); + static void calcChunkMap(ChunkMap const& chunkMap, ChunkVector& chunksSortedBySize); /// Verify that all chunks belong to at least one worker and that all chunks are represented in shared /// scans. /// @throws ChunkMapException - static void verify(ChunkMap const& chunkMap, WorkerChunkMap const& wcMap); + void verify(); static std::string dumpChunkMap(ChunkMap const& chunkMap); @@ -207,29 +190,133 @@ class CzarChunkMap { return {_chunkMap, _workerChunkMap}; } -private: - /// Try to `_read` values for maps from `qmeta`. - CzarChunkMap(std::shared_ptr const& qmeta); + /// Use the information from the registry to `organize` `_chunkMap` and `_workerChunkMap` + /// into their expected formats. + void organize(); - /// Read the json worker list from the database and update the maps if there's a new - /// version since the `_lastUpdateTime`. - /// @throws `qmeta::QMetaError` - bool _read(); +private: + CzarChunkMap(); - std::shared_ptr _qmeta; ///< Database connection to collect json worker list. + /// Return shared pointers to `_chunkMap` and `_workerChunkMap`, which should be held until + /// finished with the data. + std::pair, std::shared_ptr> + _getMaps() const { + std::lock_guard lck(_mapMtx); + return {_chunkMap, _workerChunkMap}; + } /// Map of all workers and which chunks they contain. - std::shared_ptr _workerChunkMap; + std::shared_ptr _workerChunkMap{new WorkerChunkMap()}; /// Map of all chunks in the system with chunkId number as the key and the values contain /// information about the tables in those chunks and which worker is responsible for /// handling the chunk in a shared scan. - std::shared_ptr _chunkMap; + std::shared_ptr _chunkMap{new ChunkMap()}; + + mutable std::mutex _mapMtx; ///< protects _workerChunkMap, _chunkMap (TODO:UJ may not be needed anymore) + + friend CzarFamilyMap; +}; + +/// This class is used to organize worker chunk table information so that it +/// can be used to send jobs to the appropriate worker and inform workers +/// what chunks they can expect to handle in shared scans, focusing at the +/// family level. +/// The data for the maps is provided by the Replicator and stored in the +/// QMeta database. +/// When the data is changed, there is a timestamp that is updated, which +/// will cause new maps to be made by this class. +/// +/// The maps generated should be treated as constant objects stored with +/// shared pointers. As such, it should be possible for numerous threads +/// to use each map simultaneously provided they have their own pointers +/// to the maps. +/// The pointers to the maps are mutex protected to safely allow map updates. +// +// TODO:UJ move this to its own header file. +// +// TODO:UJ Currently, each family only has one database and they share a name. +// Once a table mapping databases to families is available, it needs to be +// used to map databases to families in this class. +class CzarFamilyMap { +public: + using Ptr = std::shared_ptr; + typedef std::map FamilyMapType; + typedef std::map DbNameToFamilyNameType; + + static Ptr create(std::shared_ptr const& qmeta); + + CzarFamilyMap() = delete; + CzarFamilyMap(CzarFamilyMap const&) = delete; + CzarFamilyMap& operator=(CzarFamilyMap const&) = delete; + + ~CzarFamilyMap() = default; + + /// For unit testing only + /// @param dbNameToFamilyNameType - valid map of db to family name for the unit test. + // TODO::UJ define member instance for `_dbNameToFamilyName` + CzarFamilyMap(std::shared_ptr const& dbNameToFamilyName) {} + + std::string cName(const char* fName) const { + return std::string("CzarFamilyMap::") + ((fName == nullptr) ? "?" : fName); + } + + /// Family names are unknown until a table has been added to the database, so + /// the dbName will be used as the family name until the table exists. + std::string getFamilyNameFromDbName(std::string const& dbName) const { + // TODO:UJ use a member instance of std::shared_ptr + // once info is available in QMeta. + return dbName; + } + + /// Return the chunk map for the database `dbName` + CzarChunkMap::Ptr getChunkMap(std::string const& dbName) const { + auto familyName = getFamilyNameFromDbName(dbName); + return _getChunkMap(familyName); + } + + /// Read the registry information from the database, if not already set. + bool read(); + + /// Make a new FamilyMapType map including ChunkMap and WorkerChunkMap from the data + /// in `qChunkMap`. Each family has its own ChunkMap and WorkerChunkMap. + std::shared_ptr makeNewMaps(qmeta::QMetaChunkMap const& qChunkMap); + + /// Insert the new element described by the parameters into the `newFamilyMap` as appropriate. + void insertIntoMaps(std::shared_ptr const& newFamilyMap, std::string const& workerId, + std::string const& dbName, std::string const& tableName, int64_t chunkIdNum, + CzarChunkMap::SizeT sz); + + /// Verify the `familyMap` does not have errors. + static void verify(std::shared_ptr const& familyMap); + +private: + /// Try to `_read` values for maps from `qmeta`. + CzarFamilyMap(std::shared_ptr const& qmeta); + + /// Read the registry information from the database, stopping if + /// it hasn't been updated. + // TODO:UJ add a changed timestamp (similar to the existing updated timestamp) + // to the registry database and only update when changed. + bool _read(); + + /// Return the chunk map for the `familyName` + CzarChunkMap::Ptr _getChunkMap(std::string const& familyName) const { + std::lock_guard familyLock(_familyMapMtx); + auto iter = _familyMap->find(familyName); + if (iter == _familyMap->end()) { + return nullptr; + } + return iter->second; + } + + std::shared_ptr _qmeta; ///< Database connection to collect json worker list. /// The last time the maps were updated with information from the replicator. TIMEPOINT _lastUpdateTime; // initialized to 0; - mutable std::mutex _mapMtx; ///< protects _workerChunkMap, _chunkMap, _timeStamp, and _qmeta. + std::shared_ptr _familyMap{new FamilyMapType()}; + mutable std::mutex _familyMapMtx; ///< protects _familyMap, _timeStamp, and _qmeta. }; } // namespace lsst::qserv::czar diff --git a/src/czar/CzarRegistry.cc b/src/czar/CzarRegistry.cc index 074ba9bba6..f5abfcaba6 100644 --- a/src/czar/CzarRegistry.cc +++ b/src/czar/CzarRegistry.cc @@ -49,8 +49,8 @@ LOG_LOGGER _log = LOG_GET("lsst.qserv.czar.CzarRegistry"); namespace lsst::qserv::czar { CzarRegistry::CzarRegistry(std::shared_ptr const& czarConfig) : _czarConfig(czarConfig) { - // Begin periodically updating worker's status in the Replication System's registry - // in the detached thread. This will continue before the application gets terminated. + // Begin periodically updating worker's status in the Replication System's registry. + // This will continue until the application gets terminated. thread registryUpdateThread(&CzarRegistry::_registryUpdateLoop, this); _czarHeartbeatThrd = move(registryUpdateThread); @@ -149,13 +149,13 @@ CzarRegistry::WorkerContactMapPtr CzarRegistry::_buildMapFromJson(nlohmann::json int wPort = jsQserv.at("management-port").get(); uint64_t updateTimeInt = jsQserv.at("update-time-ms").get(); TIMEPOINT updateTime = TIMEPOINT(chrono::milliseconds(updateTimeInt)); - WorkerContactInfo wInfo(key, wHost, wManagementHost, wPort, updateTime); + auto wInfo = make_shared(key, wHost, wManagementHost, wPort, updateTime); LOGS(_log, LOG_LVL_DEBUG, __func__ << " wHost=" << wHost << " wPort=" << wPort << " updateTime=" << updateTimeInt); auto iter = wMap->find(key); if (iter != wMap->end()) { LOGS(_log, LOG_LVL_ERROR, __func__ << " duplicate key " << key << " in " << response); - if (!wInfo.sameContactInfo(iter->second)) { + if (!wInfo->sameContactInfo(*(iter->second))) { LOGS(_log, LOG_LVL_ERROR, __func__ << " incongruent key " << key << " in " << response); return nullptr; } @@ -180,7 +180,7 @@ bool CzarRegistry::_compareMap(WorkerContactMap const& other) const { if (iter == other.end()) { return false; } else { - if (!(iter->second.sameContactInfo(wInfo))) { + if (!(iter->second->sameContactInfo(*wInfo))) { return false; } } @@ -188,4 +188,11 @@ bool CzarRegistry::_compareMap(WorkerContactMap const& other) const { return true; } +string CzarRegistry::WorkerContactInfo::dump() const { + stringstream os; + os << "workerContactInfo{" + << "id=" << wId << " host=" << wHost << " mgHost=" << wManagementHost << " port=" << wPort << "}"; + return os.str(); +} + } // namespace lsst::qserv::czar diff --git a/src/czar/CzarRegistry.h b/src/czar/CzarRegistry.h index dd51d44098..27d20979cf 100644 --- a/src/czar/CzarRegistry.h +++ b/src/czar/CzarRegistry.h @@ -67,6 +67,8 @@ class CzarRegistry { ~CzarRegistry(); struct WorkerContactInfo { + using Ptr = std::shared_ptr; + WorkerContactInfo(std::string const& wId_, std::string const& wHost_, std::string const& wManagementHost_, int wPort_, TIMEPOINT updateTime_) : wId(wId_), @@ -85,11 +87,19 @@ class CzarRegistry { return (wId == other.wId && wHost == other.wHost && wManagementHost == other.wManagementHost && wPort == other.wPort); } + std::string dump() const; }; - using WorkerContactMap = std::unordered_map; + using WorkerContactMap = std::unordered_map; using WorkerContactMapPtr = std::shared_ptr; + /// Return _contactMap, the object that the returned pointer points to is + /// constant and no attempts should be made to change it. + WorkerContactMapPtr getWorkerContactMap() { + std::lock_guard lockG(_mapMtx); + return _contactMap; + } + private: CzarRegistry() = delete; CzarRegistry(std::shared_ptr const& czarConfig); diff --git a/src/czar/HttpCzarWorkerModule.cc b/src/czar/HttpCzarWorkerModule.cc new file mode 100644 index 0000000000..471bacee2e --- /dev/null +++ b/src/czar/HttpCzarWorkerModule.cc @@ -0,0 +1,169 @@ +/* + * LSST Data Management System + * + * This product includes software developed by the + * LSST Project (http://www.lsst.org/). + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the LSST License Statement and + * the GNU General Public License along with this program. If not, + * see . + */ + +// Class header +#include "czar/HttpCzarWorkerModule.h" + +// System headers +#include +#include + +// Qserv headers +#include "cconfig/CzarConfig.h" +#include "czar/Czar.h" +#include "qdisp/Executive.h" +#include "qdisp/UberJob.h" +#include "global/intTypes.h" +#include "http/Exceptions.h" +#include "http/RequestQuery.h" +#include "util/String.h" + +// LSST headers +#include "lsst/log/Log.h" + +using namespace std; +using json = nlohmann::json; + +namespace { +LOG_LOGGER _log = LOG_GET("lsst.qserv.czar.HttpCzarWorkerModule"); +} + +namespace lsst::qserv::czar { + +void HttpCzarWorkerModule::process(string const& context, shared_ptr const& req, + shared_ptr const& resp, string const& subModuleName, + http::AuthType const authType) { + HttpCzarWorkerModule module(context, req, resp); + module.execute(subModuleName, authType); +} + +HttpCzarWorkerModule::HttpCzarWorkerModule(string const& context, shared_ptr const& req, + shared_ptr const& resp) + : QhttpModule(context, req, resp) {} + +json HttpCzarWorkerModule::executeImpl(string const& subModuleName) { + string const func = string(__func__) + "[sub-module='" + subModuleName + "']"; + debug(func); + cconfig::CzarConfig::instance()->replicationInstanceId(); + enforceCzarName(func); + if (subModuleName == "QUERYJOB-ERROR") + return _queryJobError(); + else if (subModuleName == "QUERYJOB-READY") + return _queryJobReady(); + throw invalid_argument(context() + func + " unsupported sub-module"); +} + +json HttpCzarWorkerModule::_queryJobError() { + debug(__func__); + checkApiVersion(__func__, 34); + LOGS(_log, LOG_LVL_DEBUG, __func__ << " queryJobError json=" << body().objJson); + auto ret = _handleJobError(__func__); + return json::object(); +} + +json HttpCzarWorkerModule::_queryJobReady() { + debug(__func__); + checkApiVersion(__func__, 34); + LOGS(_log, LOG_LVL_DEBUG, __func__ << " queryJobReady json=" << body().objJson); + auto ret = _handleJobReady(__func__); + return ret; +} + +json HttpCzarWorkerModule::_handleJobError(string const& func) { + // Metadata-only responses for the file-based protocol should not have any data + + // Parse and verify the json message and then kill the UberJob. + json jsRet = {{"success", 0}, {"errortype", "unknown"}, {"note", "initialized"}}; + try { + // See qdisp::UberJob::runUberJob() for json message construction. + string const targetWorkerId = body().required("workerid"); + string const czarName = body().required("czar"); + qmeta::CzarId const czarId = body().required("czarid"); + QueryId const queryId = body().required("queryid"); + UberJobId const uberJobId = body().required("uberjobid"); + int const errorCode = body().required("errorCode"); + string const errorMsg = body().required("errorMsg"); + + // Find UberJob + qdisp::Executive::Ptr exec = czar::Czar::getCzar()->getExecutiveFromMap(queryId); + if (exec == nullptr) { + throw invalid_argument(string("HttpCzarWorkerModule::_handleJobError No executive for qid=") + + to_string(queryId) + " czar=" + to_string(czarId)); + } + qdisp::UberJob::Ptr uj = exec->findUberJob(uberJobId); + if (uj == nullptr) { + throw invalid_argument(string("HttpCzarWorkerModule::_handleJobError No UberJob for qid=") + + to_string(queryId) + " ujId=" + to_string(uberJobId) + + " czar=" + to_string(czarId)); + } + + auto importRes = uj->workerError(errorCode, errorMsg); + jsRet = importRes; + + } catch (std::invalid_argument const& iaEx) { + LOGS(_log, LOG_LVL_ERROR, + "HttpCzarWorkerModule::_handleJobError received " << iaEx.what() << " js=" << body().objJson); + jsRet = {{"success", 0}, {"errortype", "parse"}, {"note", iaEx.what()}}; + } + return jsRet; +} + +json HttpCzarWorkerModule::_handleJobReady(string const& func) { + // Metadata-only responses for the file-based protocol should not have any data + + // Parse and verify the json message and then have the uberjob import the file. + json jsRet = {{"success", 1}, {"errortype", "unknown"}, {"note", "initialized"}}; + try { + // See qdisp::UberJob::runUberJob() for json message construction. + string const targetWorkerId = body().required("workerid"); + string const czarName = body().required("czar"); + qmeta::CzarId const czarId = body().required("czarid"); + QueryId const queryId = body().required("queryid"); + UberJobId const uberJobId = body().required("uberjobid"); + string const fileUrl = body().required("fileUrl"); + uint64_t const rowCount = body().required("rowCount"); + uint64_t const fileSize = body().required("fileSize"); + + // Find UberJob + qdisp::Executive::Ptr exec = czar::Czar::getCzar()->getExecutiveFromMap(queryId); + if (exec == nullptr) { + throw invalid_argument(string("HttpCzarWorkerModule::_handleJobReady No executive for qid=") + + to_string(queryId) + " czar=" + to_string(czarId)); + } + qdisp::UberJob::Ptr uj = exec->findUberJob(uberJobId); + if (uj == nullptr) { + throw invalid_argument(string("HttpCzarWorkerModule::_handleJobReady No UberJob for qid=") + + to_string(queryId) + " ujId=" + to_string(uberJobId) + + " czar=" + to_string(czarId)); + } + + auto importRes = uj->importResultFile(fileUrl, rowCount, fileSize); + jsRet = importRes; + + } catch (std::invalid_argument const& iaEx) { + LOGS(_log, LOG_LVL_ERROR, + "HttpCzarWorkerModule::_handleJobReady received " << iaEx.what() << " js=" << body().objJson); + jsRet = {{"success", 0}, {"errortype", "parse"}, {"note", iaEx.what()}}; + } + return jsRet; +} + +} // namespace lsst::qserv::czar diff --git a/src/czar/HttpCzarWorkerModule.h b/src/czar/HttpCzarWorkerModule.h new file mode 100644 index 0000000000..69f4a3fef4 --- /dev/null +++ b/src/czar/HttpCzarWorkerModule.h @@ -0,0 +1,82 @@ +/* + * LSST Data Management System + * + * This product includes software developed by the + * LSST Project (http://www.lsst.org/). + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the LSST License Statement and + * the GNU General Public License along with this program. If not, + * see . + */ +#ifndef LSST_QSERV_CZAR_HTTPCZARWORKERMODULE_H +#define LSST_QSERV_CZAR_HTTPCZARWORKERMODULE_H + +// System headers +#include +#include + +// Third party headers +#include "nlohmann/json.hpp" + +// Qserv headers +#include "czar/QhttpModule.h" + +// Forward declarations +namespace lsst::qserv::qhttp { +class Request; +class Response; +} // namespace lsst::qserv::qhttp + +// This header declarations +namespace lsst::qserv::czar { + +/// This class is used to handle messages to this czar from the workers. +class HttpCzarWorkerModule : public QhttpModule { +public: + /// @note supported values for parameter 'subModuleName' are: + /// 'QUERYJOB-ERROR' - error in a QUERYJOB + /// 'QUERYJOB-READY' - + /// @throws std::invalid_argument for unknown values of parameter 'subModuleName' + static void process(std::string const& context, std::shared_ptr const& req, + std::shared_ptr const& resp, std::string const& subModuleName, + http::AuthType const authType = http::AuthType::NONE); + + HttpCzarWorkerModule() = delete; + HttpCzarWorkerModule(HttpCzarWorkerModule const&) = delete; + HttpCzarWorkerModule& operator=(HttpCzarWorkerModule const&) = delete; + + ~HttpCzarWorkerModule() final = default; + +protected: + nlohmann::json executeImpl(std::string const& subModuleName) final; + +private: + HttpCzarWorkerModule(std::string const& context, std::shared_ptr const& req, + std::shared_ptr const& resp); + + /// Called to handle message indicating this czar needs to handle an error on a worker. + nlohmann::json _queryJobError(); + + /// Called to indicate an UberJob is ready with data that needs to be collected. + nlohmann::json _queryJobReady(); + + /// Translates the message and calls the Czar to collect the data. + nlohmann::json _handleJobReady(std::string const& func); + + /// Translates the error and calls the Czar to take action. + nlohmann::json _handleJobError(std::string const& func); +}; + +} // namespace lsst::qserv::czar + +#endif // LSST_QSERV_CZAR_HTTPCZARWORKERMODULE_H diff --git a/src/czar/HttpSvc.cc b/src/czar/HttpSvc.cc index cddaf17b3f..b67330e27d 100644 --- a/src/czar/HttpSvc.cc +++ b/src/czar/HttpSvc.cc @@ -28,6 +28,7 @@ // Qserv headers #include "cconfig/CzarConfig.h" #include "czar/HttpMonitorModule.h" +#include "czar/HttpCzarWorkerModule.h" #include "http/MetaModule.h" #include "qhttp/Server.h" @@ -90,6 +91,16 @@ uint16_t HttpSvc::start() { [self](shared_ptr const& req, shared_ptr const& resp) { HttpMonitorModule::process(::serviceName, req, resp, "STATUS"); }}}); + _httpServerPtr->addHandlers( + {{"POST", "/queryjob-error", + [self](shared_ptr const& req, shared_ptr const& resp) { + HttpCzarWorkerModule::process(::serviceName, req, resp, "QUERYJOB-ERROR"); + }}}); + _httpServerPtr->addHandlers( + {{"POST", "/queryjob-ready", + [self](shared_ptr const& req, shared_ptr const& resp) { + HttpCzarWorkerModule::process(::serviceName, req, resp, "QUERYJOB-READY"); + }}}); _httpServerPtr->start(); // Initialize the I/O context and start the service threads. At this point diff --git a/src/czar/MessageTable.cc b/src/czar/MessageTable.cc index 088aac835b..23020dc214 100644 --- a/src/czar/MessageTable.cc +++ b/src/czar/MessageTable.cc @@ -35,7 +35,7 @@ #include "ccontrol/ConfigMap.h" #include "ccontrol/UserQuery.h" #include "czar/CzarErrors.h" -#include "qdisp/MessageStore.h" +#include "qmeta/MessageStore.h" #include "sql/SqlConnection.h" #include "sql/SqlConnectionFactory.h" @@ -122,7 +122,7 @@ void MessageTable::_saveQueryMessages(ccontrol::UserQuery::Ptr const& userQuery) // Collect information about the query and put it in the message table. int msgCount = msgStore->messageCount(); for (int i = 0; i != msgCount; ++i) { - const qdisp::QueryMessage& qm = msgStore->getMessage(i); + const qmeta::QueryMessage& qm = msgStore->getMessage(i); std::string src = qm.msgSource; if (src == "COMPLETE") { ++completeCount; diff --git a/src/czar/testCzar.cc b/src/czar/testCzar.cc index cde8e59f2b..aad9fdfd31 100644 --- a/src/czar/testCzar.cc +++ b/src/czar/testCzar.cc @@ -37,8 +37,8 @@ #include "lsst/log/Log.h" // Qserv headers -#include "czar/CzarChunkMap.h" #include "qmeta/QMeta.h" +#include "czar/CzarChunkMap.h" namespace test = boost::test_tools; using namespace lsst::qserv; @@ -51,13 +51,13 @@ using namespace std; BOOST_AUTO_TEST_SUITE(Suite) -void insertIntoQChunkMap(qmeta::QMeta::ChunkMap& qChunkMap, string const& workerId, string const& dbName, +void insertIntoQChunkMap(qmeta::QMetaChunkMap& qChunkMap, string const& workerId, string const& dbName, string const& tableName, unsigned int chunkNum, size_t sz) { - qChunkMap.workers[workerId][dbName][tableName].push_back(qmeta::QMeta::ChunkMap::ChunkInfo{chunkNum, sz}); + qChunkMap.workers[workerId][dbName][tableName].push_back(qmeta::QMetaChunkMap::ChunkInfo{chunkNum, sz}); } -qmeta::QMeta::ChunkMap convertJsonToChunkMap(nlohmann::json const& jsChunks) { - qmeta::QMeta::ChunkMap qChunkMap; +qmeta::QMetaChunkMap convertJsonToChunkMap(nlohmann::json const& jsChunks) { + qmeta::QMetaChunkMap qChunkMap; for (auto const& [workerId, dbs] : jsChunks.items()) { for (auto const& [dbName, tables] : dbs.items()) { for (auto const& [tableName, chunks] : tables.items()) { @@ -186,17 +186,20 @@ BOOST_AUTO_TEST_CASE(CzarChunkMap) { } )"; + auto dbToFamily = make_shared(); + czar::CzarFamilyMap czFamMap(dbToFamily); + auto jsTest1 = nlohmann::json::parse(test1); - qmeta::QMeta::ChunkMap qChunkMap1 = convertJsonToChunkMap(jsTest1); - auto [chunkMapPtr, wcMapPtr] = czar::CzarChunkMap::makeNewMaps(qChunkMap1); - czar::CzarChunkMap::verify(*chunkMapPtr, *wcMapPtr); // Throws on failure. - LOGS(_log, LOG_LVL_DEBUG, "CzarChunkMap test 1 passed"); + qmeta::QMetaChunkMap qChunkMap1 = convertJsonToChunkMap(jsTest1); + auto familyMap = czFamMap.makeNewMaps(qChunkMap1); + czar::CzarFamilyMap::verify(familyMap); // Throws on failure. + LOGS(_log, LOG_LVL_DEBUG, "CzarFamilyMap test 1 passed"); auto jsTest2 = nlohmann::json::parse(test2); - qmeta::QMeta::ChunkMap qChunkMap2 = convertJsonToChunkMap(jsTest2); - tie(chunkMapPtr, wcMapPtr) = czar::CzarChunkMap::makeNewMaps(qChunkMap2); - czar::CzarChunkMap::verify(*chunkMapPtr, *wcMapPtr); // Throws on failure. - LOGS(_log, LOG_LVL_DEBUG, "CzarChunkMap test 2 passed"); + qmeta::QMetaChunkMap qChunkMap2 = convertJsonToChunkMap(jsTest2); + auto familyMap2 = czFamMap.makeNewMaps(qChunkMap2); + czar::CzarFamilyMap::verify(familyMap2); // Throws on failure. + LOGS(_log, LOG_LVL_DEBUG, "CzarFamilyMap test 2 passed"); } BOOST_AUTO_TEST_SUITE_END() diff --git a/src/global/intTypes.h b/src/global/intTypes.h index 4182544f7f..c3a6f7fb07 100644 --- a/src/global/intTypes.h +++ b/src/global/intTypes.h @@ -37,6 +37,8 @@ typedef std::vector Int32Vector; /// Typedef for Query ID in query metadata. typedef std::uint64_t QueryId; +typedef std::int64_t JobId; +typedef JobId UberJobId; // These must be the same type. /// Class to provide a consistent format for QueryIds in the log file class QueryIdHelper { @@ -45,15 +47,15 @@ class QueryIdHelper { /// @parameter qid - query id number. /// @parameter invalid - true, qid is not a valid user query id. static std::string makeIdStr(QueryId qid, bool invalid = false) { - if (invalid) return "QI=?:"; - return "QI=" + std::to_string(qid) + ":"; + if (invalid) return "QID=?:"; + return "QID=" + std::to_string(qid) + ":"; } /// Returns a standardized user query id string with jobId. /// @parameter qid - query id number. /// @parameter jobId - the job id number. /// @parameter invalid - true, qid is not a valid user query id. - static std::string makeIdStr(QueryId qid, int jobId, bool invalid = false) { + static std::string makeIdStr(QueryId qid, JobId jobId, bool invalid = false) { if (invalid) return makeIdStr(qid, true) + "?;"; return makeIdStr(qid) + std::to_string(jobId) + ";"; } diff --git a/src/http/MetaModule.cc b/src/http/MetaModule.cc index f64572b08d..4422ade6dd 100644 --- a/src/http/MetaModule.cc +++ b/src/http/MetaModule.cc @@ -37,7 +37,12 @@ string const adminAuthKey; namespace lsst::qserv::http { -unsigned int const MetaModule::version = 35; +// MetaModule::version is the ultimate source of truth for the version number. +// All version values must match. Other version location are in : +// class ReplicationInterface repl_api_version +// in src/admin/python/lsst/qserv/admin/replicationinterface.py +// RestAPIVersion in src/www/qserv/js/Common.js +unsigned int const MetaModule::version = 35; // TODO:UJ this may need to change when merging the branch. void MetaModule::process(string const& context, nlohmann::json const& info, shared_ptr const& req, shared_ptr const& resp, diff --git a/src/http/Module.h b/src/http/Module.h index 009d2a1923..0a8d0ac957 100644 --- a/src/http/Module.h +++ b/src/http/Module.h @@ -208,6 +208,8 @@ class Module { */ virtual void sendResponse(std::string const& content, std::string const& contentType) = 0; + std::string authKey() const { return _authKey; } + private: /** * Pull the raw request body and translate it into a JSON object. diff --git a/src/http/RequestBodyJSON.h b/src/http/RequestBodyJSON.h index 896250d32e..a3363e3416 100644 --- a/src/http/RequestBodyJSON.h +++ b/src/http/RequestBodyJSON.h @@ -32,6 +32,8 @@ // This header declarations namespace lsst::qserv::http { +// TODO:UJ This should be renamed RequestBodyJson, coding standards. + /** * Class RequestBodyJSON represents the request body parsed into a JSON object. * This type of an object is only available for requests that have the following @@ -42,6 +44,17 @@ class RequestBodyJSON { /// parsed body of the request nlohmann::json objJson = nlohmann::json::object(); + RequestBodyJSON() = default; + RequestBodyJSON(RequestBodyJSON const&) = default; + RequestBodyJSON& operator=(RequestBodyJSON const&) = default; + + ~RequestBodyJSON() = default; + + /// Make a new RequestBody based on `js` + /// TODO:UJ This would be much more efficient if this class had objJson defined as + /// a const reference or pointer to const, but implementation is likely ugly. + RequestBodyJSON(nlohmann::json const& js) : objJson(js) {} + /** * Check if the specified parameter is present in the input JSON object. * @param obj JSON object to be inspected. @@ -73,8 +86,11 @@ class RequestBodyJSON { throw std::invalid_argument("RequestBodyJSON::" + std::string(__func__) + "[static] parameter 'obj' is not a valid JSON object"); } - if (obj.find(name) != obj.end()) return obj[name]; - throw std::invalid_argument("RequestBodyJSON::" + std::string(__func__) + + + if (auto const iter = obj.find(name); iter != obj.end()) { + return *iter; + } + throw std::invalid_argument("RequestBody::" + std::string(__func__) + "[static] required parameter " + name + " is missing in the request body"); } diff --git a/src/proto/ScanTableInfo.h b/src/proto/ScanTableInfo.h index 634953e656..f2dacec61a 100644 --- a/src/proto/ScanTableInfo.h +++ b/src/proto/ScanTableInfo.h @@ -38,6 +38,7 @@ namespace lsst::qserv::proto { struct ScanTableInfo { using ListOf = std::vector; + ScanTableInfo() = default; ScanTableInfo(std::string const& db_, std::string const& table_) : db(db_), table(table_) {} ScanTableInfo(std::string const& db_, std::string const& table_, bool lockInMemory_, int scanRating_) : db{db_}, table{table_}, lockInMemory{lockInMemory_}, scanRating{scanRating_} {} @@ -47,6 +48,8 @@ struct ScanTableInfo { lockInMemory{scanTbl.lockinmemory()}, scanRating{scanTbl.scanrating()} {} + ScanTableInfo(ScanTableInfo const&) = default; + /// Copy contents of this object into a TaskMsg_ScanTable object. void copyToScanTable(TaskMsg_ScanTable* msgScanTbl) const { msgScanTbl->set_db(db); @@ -67,6 +70,9 @@ struct ScanInfo { /// Threshold priority values. Scan priorities are not limited to these values. enum Rating { FASTEST = 0, FAST = 10, MEDIUM = 20, SLOW = 30, SLOWEST = 100 }; + ScanInfo() = default; + ScanInfo(ScanInfo const&) = default; + void sortTablesSlowestFirst(); int compareTables(ScanInfo const& rhs); diff --git a/src/proto/worker.proto b/src/proto/worker.proto index 08f75297ef..76d607997f 100644 --- a/src/proto/worker.proto +++ b/src/proto/worker.proto @@ -29,6 +29,7 @@ option cc_enable_arenas = true; package lsst.qserv.proto; +// TODO:UJ delete when xrootd removed. ResonseSummary will need to be kept. // Query message sent to worker // One of these Task objects should be sent. message TaskMsg { diff --git a/src/qdisp/CMakeLists.txt b/src/qdisp/CMakeLists.txt index 567266888c..e0aa446672 100644 --- a/src/qdisp/CMakeLists.txt +++ b/src/qdisp/CMakeLists.txt @@ -5,12 +5,12 @@ target_sources(qdisp PRIVATE ChunkMeta.cc CzarStats.cc Executive.cc + JobBase.cc JobDescription.cc JobQuery.cc - JobStatus.cc - MessageStore.cc QdispPool.cc QueryRequest.cc + UberJob.cc XrdSsiMocks.cc ) diff --git a/src/qdisp/Executive.cc b/src/qdisp/Executive.cc index defcf56541..97ebf2d6a0 100644 --- a/src/qdisp/Executive.cc +++ b/src/qdisp/Executive.cc @@ -58,20 +58,24 @@ // Qserv headers #include "cconfig/CzarConfig.h" +#include "ccontrol/MergingHandler.h" #include "ccontrol/msgCode.h" +#include "ccontrol/TmpTableName.h" +#include "ccontrol/UserQuerySelect.h" #include "global/LogContext.h" #include "global/ResourceUnit.h" #include "qdisp/CzarStats.h" #include "qdisp/JobQuery.h" -#include "qdisp/MessageStore.h" #include "qdisp/QueryRequest.h" #include "qdisp/ResponseHandler.h" #include "qdisp/XrdSsiMocks.h" #include "query/QueryContext.h" #include "qproc/QuerySession.h" #include "qmeta/Exceptions.h" +#include "qmeta/MessageStore.h" #include "qmeta/QStatus.h" #include "query/SelectStmt.h" +#include "rproc/InfileMerger.h" #include "util/AsyncTimer.h" #include "util/Bug.h" #include "util/EventThread.h" @@ -99,7 +103,7 @@ namespace lsst::qserv::qdisp { //////////////////////////////////////////////////////////////////////// // class Executive implementation //////////////////////////////////////////////////////////////////////// -Executive::Executive(ExecutiveConfig const& c, shared_ptr const& ms, +Executive::Executive(ExecutiveConfig const& c, shared_ptr const& ms, SharedResources::Ptr const& sharedResources, shared_ptr const& qStatus, shared_ptr const& querySession) : _config(c), @@ -114,8 +118,13 @@ Executive::Executive(ExecutiveConfig const& c, shared_ptr const& m } Executive::~Executive() { + LOGS(_log, LOG_LVL_DEBUG, "Executive::~Executive() " << getIdStr()); qdisp::CzarStats::get()->deleteQuery(); qdisp::CzarStats::get()->deleteJobs(_incompleteJobs.size()); + // Remove this executive from the map. + if (czar::Czar::getCzar()->getExecutiveFromMap(getId()) != nullptr) { + LOGS(_log, LOG_LVL_ERROR, cName(__func__) + " pointer in map should be invalid QID=" << getId()); + } // Real XrdSsiService objects are unowned, but mocks are allocated in _setup. delete dynamic_cast(_xrdSsiService); if (_asyncTimer != nullptr) { @@ -124,7 +133,7 @@ Executive::~Executive() { } } -Executive::Ptr Executive::create(ExecutiveConfig const& c, shared_ptr const& ms, +Executive::Ptr Executive::create(ExecutiveConfig const& c, shared_ptr const& ms, SharedResources::Ptr const& sharedResources, shared_ptr const& qMeta, shared_ptr const& querySession, @@ -142,17 +151,20 @@ Executive::Ptr Executive::create(ExecutiveConfig const& c, shared_ptrczarStatsUpdateIvalSec(); if (czarStatsUpdateIvalSec > 0) { + // AsyncTimer has a 'self' keep alive in AsyncTimer::start() that keeps it safe when + // this Executive is deleted. ptr->_asyncTimer = util::AsyncTimer::create( asioIoService, std::chrono::milliseconds(czarStatsUpdateIvalSec * 1000), [self = std::weak_ptr(ptr)](auto expirationIvalMs) -> bool { auto ptr = self.lock(); - LOGS(_log, LOG_LVL_DEBUG, - "Executive::" << __func__ << " expirationIvalMs: " << expirationIvalMs.count() - << " ms"); + string const msg = string("Executive::") + __func__ + + " expirationIvalMs: " + to_string(expirationIvalMs.count()) + " ms"; if (ptr != nullptr) { ptr->_updateStats(); + LOGS(_log, LOG_LVL_DEBUG, msg + " " + ptr->getIdStr()); return true; } + LOGS(_log, LOG_LVL_DEBUG, msg); return false; }); ptr->_asyncTimer->start(); @@ -166,23 +178,39 @@ void Executive::_updateStats() const { } void Executive::setQueryId(QueryId id) { + if (_queryIdSet.exchange(true) == true) { + throw util::Bug(ERR_LOC, "Executive::setQueryId called more than once _id=" + to_string(_id) + + " id=" + to_string(id)); + } _id = id; _idStr = QueryIdHelper::makeIdStr(_id); + + // Insert into the global executive map. + { czar::Czar::getCzar()->insertExecutive(_id, shared_from_this()); } qdisp::CzarStats::get()->trackQueryProgress(_id); } +UberJob::Ptr Executive::findUberJob(UberJobId ujId) { + lock_guard lgMap(_uberJobsMapMtx); + auto iter = _uberJobsMap.find(ujId); + if (iter == _uberJobsMap.end()) { + return nullptr; + } + return iter->second; +} + /// Add a new job to executive queue, if not already in. Not thread-safe. /// JobQuery::Ptr Executive::add(JobDescription::Ptr const& jobDesc) { JobQuery::Ptr jobQuery; { // Create the JobQuery and put it in the map. - JobStatus::Ptr jobStatus = make_shared(); + auto jobStatus = make_shared(); Ptr thisPtr = shared_from_this(); MarkCompleteFunc::Ptr mcf = make_shared(thisPtr, jobDesc->id()); jobQuery = JobQuery::create(thisPtr, jobDesc, jobStatus, mcf, _id); - QSERV_LOGCONTEXT_QUERY_JOB(jobQuery->getQueryId(), jobQuery->getIdInt()); + QSERV_LOGCONTEXT_QUERY_JOB(jobQuery->getQueryId(), jobQuery->getJobId()); { lock_guard lock(_cancelled.getMutex()); @@ -197,10 +225,12 @@ JobQuery::Ptr Executive::add(JobDescription::Ptr const& jobDesc) { return jobQuery; } - if (!_track(jobQuery->getIdInt(), jobQuery)) { + if (!_track(jobQuery->getJobId(), jobQuery)) { LOGS(_log, LOG_LVL_ERROR, "Executive ignoring duplicate track add"); return jobQuery; } + + _addToChunkJobMap(jobQuery); } if (_empty.exchange(false)) { @@ -209,14 +239,16 @@ JobQuery::Ptr Executive::add(JobDescription::Ptr const& jobDesc) { ++_requestCount; } - QSERV_LOGCONTEXT_QUERY_JOB(jobQuery->getQueryId(), jobQuery->getIdInt()); + QSERV_LOGCONTEXT_QUERY_JOB(jobQuery->getQueryId(), jobQuery->getJobId()); - LOGS(_log, LOG_LVL_DEBUG, "Executive::add with path=" << jobDesc->resource().path()); + return jobQuery; +} + +void Executive::runJobQuery(JobQuery::Ptr const& jobQuery) { bool started = jobQuery->runJob(); if (!started && isLimitRowComplete()) { - markCompleted(jobQuery->getIdInt(), false); + markCompleted(jobQuery->getJobId(), false); } - return jobQuery; } void Executive::queueJobStart(PriorityCommand::Ptr const& cmd) { @@ -228,6 +260,32 @@ void Executive::queueJobStart(PriorityCommand::Ptr const& cmd) { } } +void Executive::queueFileCollect(PriorityCommand::Ptr const& cmd) { + if (_scanInteractive) { + _qdispPool->queCmd(cmd, 3); + } else { + _qdispPool->queCmd(cmd, 4); + } +} + +void Executive::runUberJob(std::shared_ptr const& uberJob) { + /// TODO:UJ delete useqdisppool, only set to false if problems during testing + bool const useqdisppool = true; + if (useqdisppool) { + auto runUberJobFunc = [uberJob](util::CmdData*) { uberJob->runUberJob(); }; + + auto cmd = qdisp::PriorityCommand::Ptr(new qdisp::PriorityCommand(runUberJobFunc)); + _jobStartCmdList.push_back(cmd); + if (_scanInteractive) { + _qdispPool->queCmd(cmd, 0); + } else { + _qdispPool->queCmd(cmd, 1); + } + } else { + uberJob->runUberJob(); + } +} + void Executive::waitForAllJobsToStart() { LOGS(_log, LOG_LVL_INFO, "waitForAllJobsToStart"); // Wait for each command to start. @@ -243,12 +301,11 @@ void Executive::waitForAllJobsToStart() { // If the executive has not been cancelled, then we simply start the query. // @return true if query was actually started (i.e. we were not cancelled) -// +// // TODO:UJ delete this function bool Executive::startQuery(shared_ptr const& jobQuery) { lock_guard lock(_cancelled.getMutex()); - // If we have been cancelled, then return false. - // + // If this has been cancelled, then return false. if (_cancelled) return false; // Construct a temporary resource object to pass to ProcessRequest(). @@ -271,11 +328,67 @@ bool Executive::startQuery(shared_ptr const& jobQuery) { return true; } +Executive::ChunkIdJobMapType Executive::unassignedChunksInQuery() { + lock_guard lck(_chunkToJobMapMtx); + + ChunkIdJobMapType unassignedMap; + for (auto const& [key, jobPtr] : _chunkToJobMap) { + if (!jobPtr->isInUberJob()) { + unassignedMap[key] = jobPtr; + } + } + return unassignedMap; +} + +void Executive::addUberJobs(std::vector> const& uJobsToAdd) { + lock_guard lck(_uberJobsMapMtx); + for (auto const& uJob : uJobsToAdd) { + UberJobId ujId = uJob->getJobId(); + _uberJobsMap[ujId] = uJob; + } +} + +string Executive::dumpUberJobCounts() const { + stringstream os; + os << "exec=" << getIdStr(); + int totalJobs = 0; + { + lock_guard ujmLck(_uberJobsMapMtx); + for (auto const& [ujKey, ujPtr] : _uberJobsMap) { + int jobCount = ujPtr->getJobCount(); + totalJobs += jobCount; + os << "{" << ujKey << ":" << ujPtr->getIdStr() << " jobCount=" << jobCount << "}"; + } + } + { + lock_guard jmLck(_jobMapMtx); + os << " ujTotalJobs=" << totalJobs << " execJobs=" << _jobMap.size(); + } + return os.str(); +} + +void Executive::assignJobsToUberJobs() { + auto uqs = _userQuerySelect.lock(); + if (uqs != nullptr) { + uqs->buildAndSendUberJobs(); + } +} + +void Executive::addMultiError(int errorCode, std::string const& errorMsg, int errorState) { + util::Error err(errorCode, errorMsg, errorState); + { + lock_guard lock(_errorsMutex); + _multiError.push_back(err); + LOGS(_log, LOG_LVL_DEBUG, + cName(__func__) + " multiError:" << _multiError.size() << ":" << _multiError); + } +} + /// Add a JobQuery to this Executive. /// Return true if it was successfully added to the map. /// bool Executive::_addJobToMap(JobQuery::Ptr const& job) { - auto entry = pair(job->getIdInt(), job); + auto entry = pair(job->getJobId(), job); lock_guard lockJobMap(_jobMapMtx); bool res = _jobMap.insert(entry).second; _totalJobs = _jobMap.size(); @@ -289,9 +402,10 @@ bool Executive::join() { // Okay to merge. probably not the Executive's responsibility struct successF { static bool func(Executive::JobMap::value_type const& entry) { - JobStatus::Info const& esI = entry.second->getStatus()->getInfo(); + qmeta::JobStatus::Info const& esI = entry.second->getStatus()->getInfo(); LOGS(_log, LOG_LVL_TRACE, "entry state:" << (void*)entry.second.get() << " " << esI); - return (esI.state == JobStatus::RESPONSE_DONE) || (esI.state == JobStatus::COMPLETE); + return (esI.state == qmeta::JobStatus::RESPONSE_DONE) || + (esI.state == qmeta::JobStatus::COMPLETE); } }; @@ -318,7 +432,7 @@ bool Executive::join() { return _empty || isLimitRowComplete(); } -void Executive::markCompleted(int jobId, bool success) { +void Executive::markCompleted(JobId jobId, bool success) { ResponseHandler::Error err; string idStr = QueryIdHelper::makeIdStr(_id, jobId); LOGS(_log, LOG_LVL_DEBUG, "Executive::markCompleted " << success); @@ -349,13 +463,10 @@ void Executive::markCompleted(int jobId, bool success) { lock_guard lockJobMap(_jobMapMtx); auto job = _jobMap[jobId]; string id = job->getIdStr() + "<>" + idStr; - auto jState = job->getStatus()->getInfo().state; + // Don't overwrite existing error states. - if (jState != JobStatus::CANCEL && jState != JobStatus::RESPONSE_ERROR && - jState != JobStatus::RESULT_ERROR && jState != JobStatus::MERGE_ERROR) { - job->getStatus()->updateInfo(id, JobStatus::RESULT_ERROR, "EXECFAIL", err.getCode(), - err.getMsg()); - } + job->getStatus()->updateInfoNoErrorOverwrite(id, qmeta::JobStatus::RESULT_ERROR, "EXECFAIL", + err.getCode(), err.getMsg()); } { lock_guard lock(_errorsMutex); @@ -376,11 +487,11 @@ void Executive::markCompleted(int jobId, bool success) { void Executive::squash() { bool alreadyCancelled = _cancelled.exchange(true); if (alreadyCancelled) { - LOGS(_log, LOG_LVL_DEBUG, "Executive::squash() already cancelled! refusing."); + LOGS(_log, LOG_LVL_DEBUG, "Executive::squash() already cancelled! refusing. qid=" << getId()); return; } - LOGS(_log, LOG_LVL_INFO, "Executive::squash Trying to cancel all queries..."); + LOGS(_log, LOG_LVL_INFO, "Executive::squash Trying to cancel all queries... qid=" << getId()); deque jobsToCancel; { lock_guard lockJobMap(_jobMapMtx); @@ -392,6 +503,13 @@ void Executive::squash() { for (auto const& job : jobsToCancel) { job->cancel(); } + + // TODO:UJ - Send a message to all workers saying this czarId + queryId is cancelled. + // The workers will just mark all associated tasks as cancelled, and that should be it. + // Any message to this czar about this query should result in an error sent back to + // the worker as soon it can't locate an executive or the executive says cancelled. + bool const deleteResults = true; + sendWorkerCancelMsg(deleteResults); LOGS(_log, LOG_LVL_DEBUG, "Executive::squash done"); } @@ -409,7 +527,8 @@ void Executive::_squashSuperfluous() { JobQuery::Ptr jq = jobEntry.second; // It's important that none of the cancelled queries // try to remove their rows from the result. - if (jq->getStatus()->getInfo().state != JobStatus::COMPLETE) { + if (jq->getStatus()->getInfo().state != qmeta::JobStatus::COMPLETE && + jq->getStatus()->getInfo().state != qmeta::JobStatus::CANCEL) { jobsToCancel.push_back(jobEntry.second); } } @@ -418,9 +537,22 @@ void Executive::_squashSuperfluous() { for (auto const& job : jobsToCancel) { job->cancel(true); } + + bool const keepResults = false; + sendWorkerCancelMsg(keepResults); LOGS(_log, LOG_LVL_DEBUG, "Executive::squashSuperfluous done"); } +void Executive::sendWorkerCancelMsg(bool deleteResults) { + // TODO:UJ need to send a message to the worker that the query is cancelled and all result files + // should be delete + LOGS(_log, LOG_LVL_ERROR, + "TODO:UJ NEED CODE Executive::sendWorkerCancelMsg to send messages to workers to cancel this czarId " + "+ " + "queryId. " + << deleteResults); +} + int Executive::getNumInflight() const { unique_lock lock(_incompleteJobsMutex); return _incompleteJobs.size(); @@ -618,6 +750,16 @@ void Executive::_waitAllUntilEmpty() { } } +void Executive::_addToChunkJobMap(JobQuery::Ptr const& job) { + int chunkId = job->getDescription()->resource().chunk(); + auto entry = pair(chunkId, job); + lock_guard lck(_chunkToJobMapMtx); + bool inserted = _chunkToJobMap.insert(entry).second; + if (!inserted) { + throw util::Bug(ERR_LOC, "map insert FAILED ChunkId=" + to_string(chunkId) + " already existed"); + } +} + void Executive::_setupLimit() { // Figure out the limit situation. auto qSession = _querySession.lock(); @@ -653,12 +795,12 @@ void Executive::checkLimitRowComplete() { } ostream& operator<<(ostream& os, Executive::JobMap::value_type const& v) { - JobStatus::Ptr status = v.second->getStatus(); + auto const& status = v.second->getStatus(); os << v.first << ": " << *status; return os; } -/// precondition: _requestersMutex is held by current thread. +/// precondition: _incompleteJobsMutex is held by current thread. void Executive::_printState(ostream& os) { for (auto const& entry : _incompleteJobs) { JobQuery::Ptr job = entry.second; diff --git a/src/qdisp/Executive.h b/src/qdisp/Executive.h index f44abdc17c..1d95e5a9ca 100644 --- a/src/qdisp/Executive.h +++ b/src/qdisp/Executive.h @@ -30,7 +30,6 @@ #include #include #include -#include #include // Third-party headers @@ -41,24 +40,33 @@ #include "global/ResourceUnit.h" #include "global/stringTypes.h" #include "qdisp/JobDescription.h" -#include "qdisp/JobStatus.h" #include "qdisp/ResponseHandler.h" #include "qdisp/SharedResources.h" #include "qdisp/QdispPool.h" +#include "qdisp/UberJob.h" +#include "qmeta/JobStatus.h" #include "util/EventThread.h" #include "util/InstanceCount.h" #include "util/MultiError.h" #include "util/threadSafe.h" #include "util/ThreadPool.h" +// TODO:UJ replace with better enable/disable feature, or just use only UberJobs +#define uberJobsEnabled 1 + // Forward declarations class XrdSsiService; namespace lsst::qserv { +namespace ccontrol { +class UserQuerySelect; +} + namespace qmeta { +class MessageStore; class QStatus; -} +} // namespace qmeta namespace qproc { class QuerySession; @@ -66,9 +74,13 @@ class QuerySession; namespace qdisp { class JobQuery; -class MessageStore; +class UberJob; } // namespace qdisp +namespace rproc { +class InfileMerger; +} + namespace util { class AsyncTimer; } @@ -86,17 +98,18 @@ struct ExecutiveConfig { static std::string getMockStr() { return "Mock"; } }; -/// class Executive manages the execution of jobs for a UserQuery, while -/// maintaining minimal information about the jobs themselves. +/// class Executive manages the execution of jobs for a UserQuery. class Executive : public std::enable_shared_from_this { public: typedef std::shared_ptr Ptr; typedef std::unordered_map> JobMap; + typedef int ChunkIdType; + typedef std::map> ChunkIdJobMapType; /// Construct an Executive. /// If c->serviceUrl == ExecutiveConfig::getMockStr(), then use XrdSsiServiceMock /// instead of a real XrdSsiService - static Executive::Ptr create(ExecutiveConfig const& c, std::shared_ptr const& ms, + static Executive::Ptr create(ExecutiveConfig const& c, std::shared_ptr const& ms, SharedResources::Ptr const& sharedResources, std::shared_ptr const& qMeta, std::shared_ptr const& querySession, @@ -104,12 +117,33 @@ class Executive : public std::enable_shared_from_this { ~Executive(); + std::string cName(const char* funcName = "") { return std::string("Executive::") + funcName; } + + /// Set the UserQuerySelect object for this query so this Executive can ask it to make new + /// UberJobs in the future, if needed. + void setUserQuerySelect(std::shared_ptr const& uqs) { _userQuerySelect = uqs; } + + /// Return a map that only contains Jobs not assigned to an UberJob. + ChunkIdJobMapType unassignedChunksInQuery(); + + /// Find the UberJob with `ujId`. + std::shared_ptr findUberJob(UberJobId ujId); + /// Add an item with a reference number std::shared_ptr add(JobDescription::Ptr const& s); + /// TODO:UJ - to be deleted + void runJobQuery(std::shared_ptr const& jobQuery); + + // Queue `uberJob` to be run using the QDispPool. + void runUberJob(std::shared_ptr const& uberJob); + /// Queue a job to be sent to a worker so it can be started. void queueJobStart(PriorityCommand::Ptr const& cmd); + /// Queue `cmd`, using the QDispPool, so it can be used to collect the result file. + void queueFileCollect(PriorityCommand::Ptr const& cmd); + /// Waits for all jobs on _jobStartCmdList to start. This should not be called /// before ALL jobs have been added to the pool. void waitForAllJobsToStart(); @@ -119,14 +153,18 @@ class Executive : public std::enable_shared_from_this { bool join(); /// Notify the executive that an item has completed - void markCompleted(int refNum, bool success); + void markCompleted(JobId refNum, bool success); /// Squash all the jobs. void squash(); bool getEmpty() { return _empty; } + /// These values cannot be set until information has been collected from + /// QMeta, which isn't called until some basic checks on the user query + /// have passed. void setQueryId(QueryId id); + QueryId getId() const { return _id; } std::string const& getIdStr() const { return _idStr; } @@ -145,7 +183,7 @@ class Executive : public std::enable_shared_from_this { std::shared_ptr getQdispPool() { return _qdispPool; } - bool startQuery(std::shared_ptr const& jobQuery); + bool startQuery(std::shared_ptr const& jobQuery); // TODO:UJ delete /// Add 'rowCount' to the total number of rows in the result table. void addResultRows(int64_t rowCount); @@ -167,8 +205,43 @@ class Executive : public std::enable_shared_from_this { /// @see python module lsst.qserv.czar.proxy.unlock() void updateProxyMessages(); + /// Add UbjerJobs to this user query. + void addUberJobs(std::vector> const& jobsToAdd); + + /// Call UserQuerySelect::buildAndSendUberJobs make new UberJobs for + /// unassigned jobs. + void assignJobsToUberJobs(); + + int getTotalJobs() { return _totalJobs; } + + /// Set `_failedUberJob` to `val`; Setting this to true is a flag + /// that indicates to the Czar::_monitor that this Executive + /// probably has unassigned jobs that need to be placed in + /// new UberJobs. This `val` should only be set false by + /// Czar::_monitor(). + void setFlagFailedUberJob(bool val) { _failedUberJob = val; } + + /// Add an error code and message that may be displayed to the user. + void addMultiError(int errorCode, std::string const& errorMsg, int errState); + + std::string dumpUberJobCounts() const; + + // The below value should probably be based on the user query, with longer sleeps for slower queries. + int getAttemptSleepSeconds() const { return 15; } // As above or until added to config file. + int getMaxAttempts() const { return 5; } // Should be set by config + + /// Calling this indicates the executive is ready to create and execute UberJobs. + void setReadyToExecute() { _readyToExecute = true; } + + /// Returns true if the executive is ready to create and execute UberJobs. + bool isReadyToExecute() { return _readyToExecute; } + + /// Send a message to all workers to cancel this query. + /// @param deleteResults - If true, delete all result files for this query on the workers. + void sendWorkerCancelMsg(bool deleteResults); + private: - Executive(ExecutiveConfig const& c, std::shared_ptr const& ms, + Executive(ExecutiveConfig const& c, std::shared_ptr const& ms, SharedResources::Ptr const& sharedResources, std::shared_ptr const& qStatus, std::shared_ptr const& querySession); @@ -198,7 +271,7 @@ class Executive : public std::enable_shared_from_this { ExecutiveConfig _config; ///< Personal copy of config std::atomic _empty{true}; - std::shared_ptr _messageStore; ///< MessageStore for logging + std::shared_ptr _messageStore; ///< MessageStore for logging /// RPC interface, static to avoid getting every time a user query starts and separate /// from _xrdSsiService to avoid conflicts with XrdSsiServiceMock. @@ -225,11 +298,13 @@ class Executive : public std::enable_shared_from_this { mutable std::mutex _errorsMutex; std::condition_variable _allJobsComplete; + // TODO:UJ see what it takes to make this a normal mutex, before + // xrootd resulted in things being called in difficult to predict + // ways. That shouldn't be an issue any more. mutable std::recursive_mutex _jobMapMtx; - QueryId _id{0}; ///< Unique identifier for this query. + QueryId _id = 0; ///< Unique identifier for this query. std::string _idStr{QueryIdHelper::makeIdStr(0, true)}; - // util::InstanceCount _instC{"Executive"}; std::shared_ptr _qMeta; /// Last time Executive updated QMeta, defaults to epoch for clock. @@ -240,6 +315,18 @@ class Executive : public std::enable_shared_from_this { bool _scanInteractive = false; ///< true for interactive scans. + // Add a job to the _chunkToJobMap + // TODO:UJ This may need review as large changes were made to this part of the code. + // code is no longer destructive to _chunkToJobMap + void _addToChunkJobMap(std::shared_ptr const& job); + std::mutex _chunkToJobMapMtx; ///< protects _chunkToJobMap + ChunkIdJobMapType _chunkToJobMap; ///< Map of jobs ordered by chunkId + + /// Map of all UberJobs. Failed UberJobs remain in the map as new ones are created + /// to handle failed UberJobs. + std::map> _uberJobsMap; + mutable std::mutex _uberJobsMapMtx; ///< protects _uberJobs. + /// True if enough rows were read to satisfy a LIMIT query with /// no ORDER BY or GROUP BY clauses. std::atomic _limitRowComplete{false}; @@ -254,13 +341,26 @@ class Executive : public std::enable_shared_from_this { /// Number of time data has been ignored for for this user query. std::atomic _dataIgnoredCount{0}; + + std::atomic _queryIdSet{false}; ///< Set to true when _id is set. + + /// Weak pointer to the UserQuerySelect object for this query. + std::weak_ptr _userQuerySelect; + + /// If this is true, there are probably jobs that need to + /// be reassigned to new UberJobs. + std::atomic _failedUberJob{false}; + + /// Flag that is set to true when ready to create and run UberJobs. + std::atomic _readyToExecute{false}; }; +/// TODO:UJ delete - MarkCompleteFunc is not needed with uberjobs. class MarkCompleteFunc { public: typedef std::shared_ptr Ptr; - MarkCompleteFunc(Executive::Ptr const& e, int jobId) : _executive(e), _jobId(jobId) {} + MarkCompleteFunc(Executive::Ptr const& e, JobId jobId) : _executive(e), _jobId(jobId) {} virtual ~MarkCompleteFunc() {} virtual void operator()(bool success) { @@ -272,7 +372,7 @@ class MarkCompleteFunc { private: std::weak_ptr _executive; - int _jobId; + JobId _jobId; }; } // namespace qdisp diff --git a/src/qdisp/JobBase.cc b/src/qdisp/JobBase.cc new file mode 100644 index 0000000000..a5ef5a8c8f --- /dev/null +++ b/src/qdisp/JobBase.cc @@ -0,0 +1,54 @@ +/* + * LSST Data Management System + * + * This product includes software developed by the + * LSST Project (http://www.lsst.org/). + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the LSST License Statement and + * the GNU General Public License along with this program. If not, + * see . + */ + +// Class header +#include "qdisp/JobBase.h" + +// System headers +#include + +// Qserv headers + +// LSST headers +#include "lsst/log/Log.h" + +using namespace std; + +namespace { +LOG_LOGGER _log = LOG_GET("lsst.qserv.qdisp.JobBase"); +} + +namespace lsst { namespace qserv { namespace qdisp { + +std::ostream& JobBase::dumpOS(std::ostream& os) const { + os << "JobBase no data members"; + return os; +} + +std::string JobBase::dump() const { + std::ostringstream os; + dumpOS(os); + return os.str(); +} + +std::ostream& operator<<(std::ostream& os, JobBase const& jb) { return jb.dumpOS(os); } + +}}} // namespace lsst::qserv::qdisp diff --git a/src/qdisp/JobBase.h b/src/qdisp/JobBase.h new file mode 100644 index 0000000000..e5df5fc2ab --- /dev/null +++ b/src/qdisp/JobBase.h @@ -0,0 +1,78 @@ +/* + * LSST Data Management System + * + * This product includes software developed by the + * LSST Project (http://www.lsst.org/). + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the LSST License Statement and + * the GNU General Public License along with this program. If not, + * see . + */ +#ifndef LSST_QSERV_QDISP_JOBBASE_H +#define LSST_QSERV_QDISP_JOBBASE_H + +// System headers +#include +#include + +// Qserv headers +#include "global/intTypes.h" + +namespace lsst::qserv::qmeta { +class JobStatus; +} + +// This header declarations +namespace lsst::qserv::qdisp { + +class Executive; +class QdispPool; +class ResponseHandler; +class QueryRequest; + +/// Base class for JobQuery and UberJob. +/// TODO:UJ This could use a lot of cleanup. Once UberJobs are fully in effect, there's no need +/// for this base class as it won't be possible to send a JobQuery to a worker without +/// putting it in an UberJob first. The UberJob is a wrapper that stores worker contact +/// info. +class JobBase : public std::enable_shared_from_this { +public: + using Ptr = std::shared_ptr; + + JobBase() = default; + JobBase(JobBase const&) = delete; + JobBase& operator=(JobBase const&) = delete; + virtual ~JobBase() = default; + + virtual QueryId getQueryId() const = 0; + virtual UberJobId getJobId() const = 0; + virtual std::string const& getIdStr() const = 0; + virtual std::shared_ptr getQdispPool() = 0; + virtual std::string const& getPayload() const = 0; ///< const& in return type is essential for xrootd + virtual std::shared_ptr getRespHandler() = 0; + virtual std::shared_ptr getStatus() = 0; + virtual bool getScanInteractive() const = 0; + virtual bool isQueryCancelled() = 0; + virtual void callMarkCompleteFunc(bool success) = 0; + virtual void setQueryRequest(std::shared_ptr const& qr) = 0; + virtual std::shared_ptr getExecutive() = 0; + + virtual std::ostream& dumpOS(std::ostream& os) const; + + std::string dump() const; + friend std::ostream& operator<<(std::ostream& os, JobBase const& jb); +}; + +} // namespace lsst::qserv::qdisp + +#endif // LSST_QSERV_QDISP_JOBBASE_H diff --git a/src/qdisp/JobDescription.cc b/src/qdisp/JobDescription.cc index 47abd2b4e9..50c05c39e6 100644 --- a/src/qdisp/JobDescription.cc +++ b/src/qdisp/JobDescription.cc @@ -35,6 +35,8 @@ // Qserv headers #include "proto/ProtoImporter.h" #include "proto/worker.pb.h" +#include "util/Bug.h" +#include "qdisp/Executive.h" #include "qdisp/ResponseHandler.h" #include "qproc/ChunkQuerySpec.h" #include "qproc/TaskMsgFactory.h" @@ -47,7 +49,7 @@ LOG_LOGGER _log = LOG_GET("lsst.qserv.qdisp.JobDescription"); namespace lsst::qserv::qdisp { -JobDescription::JobDescription(qmeta::CzarId czarId, QueryId qId, int jobId, ResourceUnit const& resource, +JobDescription::JobDescription(qmeta::CzarId czarId, QueryId qId, JobId jobId, ResourceUnit const& resource, shared_ptr const& respHandler, shared_ptr const& taskMsgFactory, shared_ptr const& chunkQuerySpec, @@ -63,7 +65,7 @@ JobDescription::JobDescription(qmeta::CzarId czarId, QueryId qId, int jobId, Res _chunkResultName(chunkResultName), _mock(mock) {} -bool JobDescription::incrAttemptCountScrubResults() { +bool JobDescription::incrAttemptCountScrubResults() { // TODO:UJ delete if (_attemptCount >= 0) { _respHandler->prepScrubResults(_jobId, _attemptCount); // Registers the job-attempt as invalid } @@ -76,6 +78,39 @@ bool JobDescription::incrAttemptCountScrubResults() { return true; } +bool JobDescription::incrAttemptCountScrubResultsJson(std::shared_ptr const& exec, bool increase) { + if (increase) { + ++_attemptCount; + } + if (_attemptCount >= MAX_JOB_ATTEMPTS) { + LOGS(_log, LOG_LVL_ERROR, "attemptCount greater than maximum number of retries " << _attemptCount); + return false; + } + + if (exec != nullptr) { + int maxAttempts = exec->getMaxAttempts(); + LOGS(_log, LOG_LVL_INFO, "JoQDescription::" << __func__ << " attempts=" << _attemptCount); + if (_attemptCount > maxAttempts) { + LOGS(_log, LOG_LVL_ERROR, + "JoQDescription::" << __func__ << " attempts(" << _attemptCount << ") > maxAttempts(" + << maxAttempts << ") cancelling"); + exec->addMultiError(qmeta::JobStatus::RETRY_ERROR, + "max attempts reached " + to_string(_attemptCount) + " " + _qIdStr, + util::ErrorCode::INTERNAL); + exec->squash(); + return false; + } + } + + // build the request + auto js = _taskMsgFactory->makeMsgJson(*_chunkQuerySpec, _chunkResultName, _queryId, _jobId, + _attemptCount, _czarId); + LOGS(_log, LOG_LVL_DEBUG, "JobDescription::" << __func__ << " js=" << (*js)); + _jsForWorker = js; + + return true; +} + void JobDescription::buildPayload() { ostringstream os; _taskMsgFactory->serializeMsg(*_chunkQuerySpec, _chunkResultName, _queryId, _jobId, _attemptCount, @@ -83,7 +118,7 @@ void JobDescription::buildPayload() { _payloads[_attemptCount] = os.str(); } -bool JobDescription::verifyPayload() const { +bool JobDescription::verifyPayload() const { // TODO:UJ delete proto::ProtoImporter pi; if (!_mock && !pi.messageAcceptable(_payloads.at(_attemptCount))) { LOGS(_log, LOG_LVL_DEBUG, _qIdStr << " Error serializing TaskMsg."); diff --git a/src/qdisp/JobDescription.h b/src/qdisp/JobDescription.h index f449ede77b..8c61f9d656 100644 --- a/src/qdisp/JobDescription.h +++ b/src/qdisp/JobDescription.h @@ -31,6 +31,9 @@ #include #include +// Third party headers +#include "nlohmann/json.hpp" + // Qserv headers #include "global/constants.h" #include "global/intTypes.h" @@ -41,15 +44,18 @@ namespace lsst::qserv { -namespace qproc { +namespace proto { +class TaskMsg; +} +namespace qproc { class ChunkQuerySpec; class TaskMsgFactory; - } // namespace qproc namespace qdisp { +class Executive; class ResponseHandler; /** Description of a job managed by the executive @@ -57,7 +63,7 @@ class ResponseHandler; class JobDescription { public: using Ptr = std::shared_ptr; - static JobDescription::Ptr create(qmeta::CzarId czarId, QueryId qId, int jobId, + static JobDescription::Ptr create(qmeta::CzarId czarId, QueryId qId, JobId jobId, ResourceUnit const& resource, std::shared_ptr const& respHandler, std::shared_ptr const& taskMsgFactory, @@ -72,7 +78,7 @@ class JobDescription { JobDescription& operator=(JobDescription const&) = delete; void buildPayload(); ///< Must be run after construction to avoid problems with unit tests. - int id() const { return _jobId; } + JobId id() const { return _jobId; } ResourceUnit const& resource() const { return _resource; } std::string const& payload() { return _payloads[_attemptCount]; } std::shared_ptr respHandler() { return _respHandler; } @@ -84,20 +90,28 @@ class JobDescription { /// @returns true when _attemptCount is incremented correctly and the payload is built. /// If the starting value of _attemptCount was greater than or equal to zero, that /// attempt is scrubbed from the result table. - bool incrAttemptCountScrubResults(); + bool incrAttemptCountScrubResults(); // TODO:UJ - to be deleted + /// Increase the attempt count by 1 and return false if that puts it over the limit. + /// TODO:UJ scrubbing results unneeded with uj. This should be renamed. + bool incrAttemptCountScrubResultsJson(std::shared_ptr const& exec, bool increase); bool verifyPayload() const; ///< @return true if the payload is acceptable to protobufs. + std::shared_ptr getJsForWorker() { return _jsForWorker; } + + void resetJsForWorker() { _jsForWorker.reset(); } // TODO:UJ may need mutex for _jsForWorker + friend std::ostream& operator<<(std::ostream& os, JobDescription const& jd); private: - JobDescription(qmeta::CzarId czarId, QueryId qId, int jobId, ResourceUnit const& resource, + JobDescription(qmeta::CzarId czarId, QueryId qId, JobId jobId, ResourceUnit const& resource, std::shared_ptr const& respHandler, std::shared_ptr const& taskMsgFactory, std::shared_ptr const& chunkQuerySpec, std::string const& chunkResultName, bool mock = false); + qmeta::CzarId _czarId; QueryId _queryId; - int _jobId; ///< Job's Id number. + JobId _jobId; ///< Job's Id number. std::string const _qIdStr; int _attemptCount{-1}; ///< Start at -1 so that first attempt will be 0, see incrAttemptCount(). ResourceUnit _resource; ///< path, e.g. /q/LSST/23125 @@ -114,6 +128,9 @@ class JobDescription { std::string _chunkResultName; bool _mock{false}; ///< True if this is a mock in a unit test. + + /// The information the worker needs to run this job. Reset once sent. + std::shared_ptr _jsForWorker; }; std::ostream& operator<<(std::ostream& os, JobDescription const& jd); diff --git a/src/qdisp/JobQuery.cc b/src/qdisp/JobQuery.cc index 2e73f3649c..9b99f4d9da 100644 --- a/src/qdisp/JobQuery.cc +++ b/src/qdisp/JobQuery.cc @@ -40,46 +40,53 @@ namespace { LOG_LOGGER _log = LOG_GET("lsst.qserv.qdisp.JobQuery"); } // anonymous namespace +using namespace std; + namespace lsst::qserv::qdisp { JobQuery::JobQuery(Executive::Ptr const& executive, JobDescription::Ptr const& jobDescription, - JobStatus::Ptr const& jobStatus, std::shared_ptr const& markCompleteFunc, - QueryId qid) - : _executive(executive), + qmeta::JobStatus::Ptr const& jobStatus, + shared_ptr const& markCompleteFunc, QueryId qid) + : JobBase(), + _executive(executive), _jobDescription(jobDescription), _markCompleteFunc(markCompleteFunc), _jobStatus(jobStatus), _qid(qid), - _idStr(QueryIdHelper::makeIdStr(qid, getIdInt())) { + _idStr(QueryIdHelper::makeIdStr(qid, getJobId())) { _qdispPool = executive->getQdispPool(); LOGS(_log, LOG_LVL_TRACE, "JobQuery desc=" << _jobDescription); } -JobQuery::~JobQuery() { LOGS(_log, LOG_LVL_DEBUG, "~JobQuery"); } +JobQuery::~JobQuery() { + LOGS(_log, LOG_LVL_DEBUG, "~JobQuery"); + LOGS(_log, LOG_LVL_WARN, "~JobQuery QID=" << _idStr); +} /** Attempt to run the job on a worker. * @return - false if it can not setup the job or the maximum number of attempts has been reached. */ -bool JobQuery::runJob() { - QSERV_LOGCONTEXT_QUERY_JOB(getQueryId(), getIdInt()); +bool JobQuery::runJob() { // TODO:UJ delete + QSERV_LOGCONTEXT_QUERY_JOB(getQueryId(), getJobId()); LOGS(_log, LOG_LVL_DEBUG, " runJob " << *this); auto executive = _executive.lock(); if (executive == nullptr) { LOGS(_log, LOG_LVL_ERROR, "runJob failed executive==nullptr"); + return false; } bool superfluous = executive->isLimitRowComplete(); bool cancelled = executive->getCancelled(); bool handlerReset = _jobDescription->respHandler()->reset(); if (!(cancelled || superfluous) && handlerReset) { - auto criticalErr = [this, &executive](std::string const& msg) { + auto criticalErr = [this, &executive](string const& msg) { LOGS(_log, LOG_LVL_ERROR, msg << " " << _jobDescription << " Canceling user query!"); executive->squash(); // This should kill all jobs in this user query. }; LOGS(_log, LOG_LVL_DEBUG, "runJob checking attempt=" << _jobDescription->getAttemptCount()); - std::lock_guard lock(_rmutex); - if (_jobDescription->getAttemptCount() < _getMaxAttempts()) { + lock_guard lock(_rmutex); + if (_jobDescription->getAttemptCount() < executive->getMaxAttempts()) { bool okCount = _jobDescription->incrAttemptCountScrubResults(); if (!okCount) { criticalErr("hit structural max of retries"); @@ -101,10 +108,10 @@ bool JobQuery::runJob() { // whether or not we are in SSI as cancellation handling differs. // LOGS(_log, LOG_LVL_TRACE, "runJob calls StartQuery()"); - std::shared_ptr jq(shared_from_this()); + JobQuery::Ptr jq(dynamic_pointer_cast(shared_from_this())); _inSsi = true; if (executive->startQuery(jq)) { - _jobStatus->updateInfo(_idStr, JobStatus::REQUEST, "EXEC"); + _jobStatus->updateInfo(_idStr, qmeta::JobStatus::REQUEST, "EXEC"); return true; } _inSsi = false; @@ -116,10 +123,10 @@ bool JobQuery::runJob() { /// Cancel response handling. Return true if this is the first time cancel has been called. bool JobQuery::cancel(bool superfluous) { - QSERV_LOGCONTEXT_QUERY_JOB(getQueryId(), getIdInt()); + QSERV_LOGCONTEXT_QUERY_JOB(getQueryId(), getJobId()); LOGS(_log, LOG_LVL_DEBUG, "JobQuery::cancel()"); if (_cancelled.exchange(true) == false) { - std::lock_guard lock(_rmutex); + lock_guard lock(_rmutex); // If _inSsi is true then this query request has been passed to SSI and // _queryRequestPtr cannot be a nullptr. Cancellation is complicated. bool cancelled = false; @@ -133,7 +140,7 @@ bool JobQuery::cancel(bool superfluous) { } } if (!cancelled) { - std::ostringstream os; + ostringstream os; os << _idStr << " cancel QueryRequest=" << _queryRequestPtr; LOGS(_log, LOG_LVL_DEBUG, os.str()); if (!superfluous) { @@ -144,7 +151,7 @@ bool JobQuery::cancel(bool superfluous) { LOGS(_log, LOG_LVL_ERROR, " can't markComplete cancelled, executive == nullptr"); return false; } - executive->markCompleted(getIdInt(), false); + executive->markCompleted(getJobId(), false); } if (!superfluous) { _jobDescription->respHandler()->processCancel(); @@ -160,7 +167,7 @@ bool JobQuery::cancel(bool superfluous) { /// cancelling all the jobs that it makes a difference. If either the executive, /// or the job has cancelled, proceeding is probably not a good idea. bool JobQuery::isQueryCancelled() { - QSERV_LOGCONTEXT_QUERY_JOB(getQueryId(), getIdInt()); + QSERV_LOGCONTEXT_QUERY_JOB(getQueryId(), getJobId()); auto exec = _executive.lock(); if (exec == nullptr) { LOGS(_log, LOG_LVL_WARN, "_executive == nullptr"); @@ -169,8 +176,48 @@ bool JobQuery::isQueryCancelled() { return exec->getCancelled(); } -std::ostream& operator<<(std::ostream& os, JobQuery const& jq) { - return os << "{" << jq.getIdStr() << jq._jobDescription << " " << *jq._jobStatus << "}"; +bool JobQuery::_setUberJobId(UberJobId ujId) { + QSERV_LOGCONTEXT_QUERY_JOB(getQueryId(), getJobId()); + if (_uberJobId >= 0 && ujId != _uberJobId) { + LOGS(_log, LOG_LVL_DEBUG, + __func__ << " couldn't change UberJobId as ujId=" << ujId << " is owned by " << _uberJobId); + return false; + } + _uberJobId = ujId; + return true; +} + +bool JobQuery::unassignFromUberJob(UberJobId ujId) { + QSERV_LOGCONTEXT_QUERY_JOB(getQueryId(), getJobId()); + std::lock_guard lock(_rmutex); + if (_uberJobId < 0) { + LOGS(_log, LOG_LVL_INFO, __func__ << " UberJobId already unassigned. attempt by ujId=" << ujId); + return true; + } + if (_uberJobId != ujId) { + LOGS(_log, LOG_LVL_ERROR, + __func__ << " couldn't change UberJobId as ujId=" << ujId << " is owned by " << _uberJobId); + return false; + } + _uberJobId = -1; + + auto exec = _executive.lock(); + // Do not increase the count as it should have been increased when the job was started. + _jobDescription->incrAttemptCountScrubResultsJson(exec, false); + return true; +} + +int JobQuery::getAttemptCount() const { + std::lock_guard lock(_rmutex); + return _jobDescription->getAttemptCount(); +} + +string const& JobQuery::getPayload() const { return _jobDescription->payload(); } + +void JobQuery::callMarkCompleteFunc(bool success) { _markCompleteFunc->operator()(success); } + +ostream& JobQuery::dumpOS(ostream& os) const { + return os << "{" << getIdStr() << _jobDescription << " " << _jobStatus << "}"; } } // namespace lsst::qserv::qdisp diff --git a/src/qdisp/JobQuery.h b/src/qdisp/JobQuery.h index f23544c212..a11b628d49 100644 --- a/src/qdisp/JobQuery.h +++ b/src/qdisp/JobQuery.h @@ -33,6 +33,7 @@ // Qserv headers #include "qdisp/Executive.h" +#include "qdisp/JobBase.h" #include "qdisp/JobDescription.h" #include "qdisp/ResponseHandler.h" #include "util/InstanceCount.h" @@ -42,31 +43,37 @@ namespace lsst::qserv::qdisp { class QdispPool; class QueryRequest; -/** This class is used to describe, monitor, and control a single query to a worker. - * - */ -class JobQuery : public std::enable_shared_from_this { +/// This class is used to describe, monitor, and control a single query to a worker. +/// TODO:UJ once all Jobs are sent out as UberJobs, the purpose of this class is a bit +/// vague. It's components should probably be split between UberJob and +/// JobDescription. +class JobQuery : public JobBase { public: typedef std::shared_ptr Ptr; /// Factory function to make certain a shared_ptr is used and _setup is called. static JobQuery::Ptr create(Executive::Ptr const& executive, JobDescription::Ptr const& jobDescription, - JobStatus::Ptr const& jobStatus, + qmeta::JobStatus::Ptr const& jobStatus, std::shared_ptr const& markCompleteFunc, QueryId qid) { - Ptr jq = std::make_shared(executive, jobDescription, jobStatus, markCompleteFunc, qid); + Ptr jq = Ptr(new JobQuery(executive, jobDescription, jobStatus, markCompleteFunc, qid)); jq->_setup(); return jq; } virtual ~JobQuery(); - virtual bool runJob(); - QueryId getQueryId() const { return _qid; } - int getIdInt() const { return _jobDescription->id(); } - std::string const& getIdStr() const { return _idStr; } + /// Run this job. + bool runJob(); + + QueryId getQueryId() const override { return _qid; } + JobId getJobId() const override { return _jobDescription->id(); } + std::string const& getPayload() const override; + std::string const& getIdStr() const override { return _idStr; } + std::shared_ptr getRespHandler() override { return _jobDescription->respHandler(); } + bool getScanInteractive() const override { return _jobDescription->getScanInteractive(); } JobDescription::Ptr getDescription() { return _jobDescription; } - std::shared_ptr getRespHandler() { return _jobDescription->respHandler(); } - JobStatus::Ptr getStatus() { return _jobStatus; } + + qmeta::JobStatus::Ptr getStatus() override { return _jobStatus; } void setQueryRequest(std::shared_ptr const& qr) { std::lock_guard lock(_rmutex); @@ -77,34 +84,61 @@ class JobQuery : public std::enable_shared_from_this { return _queryRequestPtr; } - std::shared_ptr getMarkCompleteFunc() { return _markCompleteFunc; } + void callMarkCompleteFunc(bool success) override; bool cancel(bool superfluous = false); - bool isQueryCancelled(); + bool isQueryCancelled() override; - Executive::Ptr getExecutive() { return _executive.lock(); } + std::shared_ptr getExecutive() override { return _executive.lock(); } - std::shared_ptr getQdispPool() { return _qdispPool; } + std::shared_ptr getQdispPool() override { return _qdispPool; } - friend std::ostream& operator<<(std::ostream& os, JobQuery const& jq); + std::ostream& dumpOS(std::ostream& os) const override; /// Make a copy of the job description. JobQuery::_setup() must be called after creation. /// Do not call this directly, use create. JobQuery(Executive::Ptr const& executive, JobDescription::Ptr const& jobDescription, - JobStatus::Ptr const& jobStatus, std::shared_ptr const& markCompleteFunc, - QueryId qid); + qmeta::JobStatus::Ptr const& jobStatus, + std::shared_ptr const& markCompleteFunc, QueryId qid); - bool isCancelled() { return _cancelled; } + /// If the UberJob is unassigned, change the _uberJobId to ujId. + bool setUberJobId(UberJobId ujId) { + std::lock_guard lock(_rmutex); + return _setUberJobId(ujId); + } -protected: - void _setup() { _jobDescription->respHandler()->setJobQuery(shared_from_this()); } + UberJobId getUberJobId() const { + std::lock_guard lock(_rmutex); + return _getUberJobId(); + } - int _getRunAttemptsCount() const { + bool isInUberJob() const { std::lock_guard lock(_rmutex); - return _jobDescription->getAttemptCount(); + return _isInUberJob(); } - int _getMaxAttempts() const { return 5; } // Arbitrary value until solid value with reason determined. - int _getAttemptSleepSeconds() const { return 30; } // As above or until added to config file. + + int getAttemptCount() const; + + /// If ujId is the current owner, clear ownership. + /// @return true if job is unassigned. + bool unassignFromUberJob(UberJobId ujId); + +protected: + void _setup() { + JobBase::Ptr jbPtr = shared_from_this(); + _jobDescription->respHandler()->setJobQuery(jbPtr); + } + + /// @return true if _uberJobId was set, it can only be set if it is unassigned + /// or by the current owner. + /// NOTE: _rmutex must be held before calling this + bool _setUberJobId(UberJobId ujId); + + /// NOTE: _rmutex must be held before calling this + UberJobId _getUberJobId() const { return _uberJobId; } + + /// NOTE: _rmutex must be held before calling this + bool _isInUberJob() const { return _uberJobId >= 0; } // Values that don't change once set. std::weak_ptr _executive; @@ -113,14 +147,16 @@ class JobQuery : public std::enable_shared_from_this { std::shared_ptr _markCompleteFunc; // JobStatus has its own mutex. - JobStatus::Ptr _jobStatus; ///< Points at status in Executive::_statusMap + qmeta::JobStatus::Ptr _jobStatus; ///< Points at status in Executive::_statusMap QueryId const _qid; // User query id std::string const _idStr; ///< Identifier string for logging. // Values that need mutex protection + // TODO:UJ recursive can probably go away with as well as _inSsi. mutable std::recursive_mutex _rmutex; ///< protects _jobDescription, - ///< _queryRequestPtr, and _inSsi + ///< _queryRequestPtr, _uberJobId, + ///< and _inSsi // SSI items std::shared_ptr _queryRequestPtr; @@ -130,6 +166,13 @@ class JobQuery : public std::enable_shared_from_this { std::atomic _cancelled{false}; ///< Lock to make sure cancel() is only called once. std::shared_ptr _qdispPool; + + /// The UberJobId that this job is assigned to. Values less than zero + /// indicate this job is unassigned. To prevent race conditions, + /// an UberJob may only unassign a job if it has the same ID as + /// _uberJobId. + /// All jobs must be unassigned before they can be reassigned. + UberJobId _uberJobId = -1; }; } // namespace lsst::qserv::qdisp diff --git a/src/qdisp/QueryRequest.cc b/src/qdisp/QueryRequest.cc index a8d4e8f2ee..2821f7fbae 100644 --- a/src/qdisp/QueryRequest.cc +++ b/src/qdisp/QueryRequest.cc @@ -43,9 +43,10 @@ // Qserv headers #include "czar/Czar.h" #include "qdisp/CzarStats.h" +#include "qdisp/UberJob.h" #include "global/LogContext.h" #include "proto/worker.pb.h" -#include "qdisp/JobStatus.h" +#include "qmeta/JobStatus.h" #include "qdisp/ResponseHandler.h" #include "util/Bug.h" #include "util/common.h" @@ -60,12 +61,12 @@ LOG_LOGGER _log = LOG_GET("lsst.qserv.qdisp.QueryRequest"); namespace lsst::qserv::qdisp { -QueryRequest::QueryRequest(JobQuery::Ptr const& jobQuery) - : _jobQuery(jobQuery), - _qid(jobQuery->getQueryId()), - _jobid(jobQuery->getIdInt()), - _jobIdStr(jobQuery->getIdStr()), - _qdispPool(_jobQuery->getQdispPool()) { +QueryRequest::QueryRequest(JobBase::Ptr const& job) + : _job(job), + _qid(job->getQueryId()), + _jobid(job->getJobId()), + _jobIdStr(job->getIdStr()), + _qdispPool(_job->getQdispPool()) { QSERV_LOGCONTEXT_QUERY_JOB(_qid, _jobid); LOGS(_log, LOG_LVL_TRACE, "New QueryRequest"); } @@ -86,16 +87,16 @@ QueryRequest::~QueryRequest() { char* QueryRequest::GetRequest(int& requestLength) { QSERV_LOGCONTEXT_QUERY_JOB(_qid, _jobid); lock_guard lock(_finishStatusMutex); - auto jq = _jobQuery; + auto jq = _job; if (_finishStatus != ACTIVE || jq == nullptr) { LOGS(_log, LOG_LVL_DEBUG, __func__ << " called after job finished (cancelled?)"); requestLength = 0; return const_cast(""); } - requestLength = jq->getDescription()->payload().size(); + requestLength = jq->getPayload().size(); LOGS(_log, LOG_LVL_DEBUG, "Requesting, payload size: " << requestLength); // Andy promises that his code won't corrupt it. - return const_cast(jq->getDescription()->payload().data()); + return const_cast(jq->getPayload().data()); } // Must not throw exceptions: calling thread cannot trap them. @@ -112,7 +113,7 @@ bool QueryRequest::ProcessResponse(XrdSsiErrInfo const& eInfo, XrdSsiRespInfo co } // Make a copy of the _jobQuery shared_ptr in case _jobQuery gets reset by a call to cancel() - auto jq = _jobQuery; + auto jq = _job; { lock_guard lock(_finishStatusMutex); if ((_finishStatus != ACTIVE) || (jq == nullptr)) { @@ -124,8 +125,8 @@ bool QueryRequest::ProcessResponse(XrdSsiErrInfo const& eInfo, XrdSsiRespInfo co ostringstream os; os << _jobIdStr << __func__ << " request failed " << getSsiErr(eInfo, nullptr) << " " << GetEndPoint(); - jq->getDescription()->respHandler()->errorFlush(os.str(), -1); - jq->getStatus()->updateInfo(_jobIdStr, JobStatus::RESPONSE_ERROR, "SSI"); + jq->getRespHandler()->errorFlush(os.str(), -1); + jq->getStatus()->updateInfo(_jobIdStr, qmeta::JobStatus::RESPONSE_ERROR, "SSI"); _errorFinish(); return true; } @@ -137,18 +138,18 @@ bool QueryRequest::ProcessResponse(XrdSsiErrInfo const& eInfo, XrdSsiRespInfo co break; case XrdSsiRespInfo::isData: if (string(rInfo.buff, rInfo.blen) == "MockResponse") { - jq->getStatus()->updateInfo(_jobIdStr, JobStatus::COMPLETE, "MOCK"); + jq->getStatus()->updateInfo(_jobIdStr, qmeta::JobStatus::COMPLETE, "MOCK"); _finish(); return true; } else if (rInfo.blen == 0) { // Metadata-only responses for the file-based protocol should not have any data - jq->getStatus()->updateInfo(_jobIdStr, JobStatus::RESPONSE_READY, "SSI"); + jq->getStatus()->updateInfo(_jobIdStr, qmeta::JobStatus::RESPONSE_READY, "SSI"); return _importResultFile(jq); } responseTypeName = "isData"; break; case XrdSsiRespInfo::isError: - jq->getStatus()->updateInfo(_jobIdStr, JobStatus::RESPONSE_ERROR, "SSI", rInfo.eNum, + jq->getStatus()->updateInfo(_jobIdStr, qmeta::JobStatus::RESPONSE_ERROR, "SSI", rInfo.eNum, string(rInfo.eMsg)); return _importError(string(rInfo.eMsg), rInfo.eNum); case XrdSsiRespInfo::isFile: @@ -165,13 +166,17 @@ bool QueryRequest::ProcessResponse(XrdSsiErrInfo const& eInfo, XrdSsiRespInfo co /// Retrieve and process a result file using the file-based protocol /// Uses a copy of JobQuery::Ptr instead of _jobQuery as a call to cancel() would reset _jobQuery. -bool QueryRequest::_importResultFile(JobQuery::Ptr const& jq) { +bool QueryRequest::_importResultFile(JobBase::Ptr const& job) { // It's possible jq and _jobQuery differ, so need to use jq. - if (jq->isQueryCancelled()) { + if (job->isQueryCancelled()) { LOGS(_log, LOG_LVL_WARN, "QueryRequest::_processData job was cancelled."); _errorFinish(true); return false; } + auto jq = std::dynamic_pointer_cast(job); + if (jq == nullptr) { + throw util::Bug(ERR_LOC, string(__func__) + " unexpected pointer type for job"); + } auto executive = jq->getExecutive(); if (executive == nullptr || executive->getCancelled() || executive->isLimitRowComplete()) { if (executive == nullptr || executive->getCancelled()) { @@ -209,7 +214,7 @@ bool QueryRequest::_importResultFile(JobQuery::Ptr const& jq) { // At this point all data for this job have been read, there's no point in // having XrdSsi wait for anything. - jq->getStatus()->updateInfo(_jobIdStr, JobStatus::COMPLETE, "COMPLETE"); + jq->getStatus()->updateInfo(_jobIdStr, qmeta::JobStatus::COMPLETE, "COMPLETE"); _finish(); // If the query meets the limit row complete complete criteria, it will start @@ -222,7 +227,7 @@ bool QueryRequest::_importResultFile(JobQuery::Ptr const& jq) { /// Process an incoming error. bool QueryRequest::_importError(string const& msg, int code) { - auto jq = _jobQuery; + auto jq = _job; { lock_guard lock(_finishStatusMutex); if (_finishStatus != ACTIVE || jq == nullptr) { @@ -230,7 +235,7 @@ bool QueryRequest::_importError(string const& msg, int code) { "QueryRequest::_importError code=" << code << " msg=" << msg << " not passed"); return false; } - jq->getDescription()->respHandler()->errorFlush(msg, code); + jq->getRespHandler()->errorFlush(msg, code); } _errorFinish(); return true; @@ -242,10 +247,10 @@ void QueryRequest::ProcessResponseData(XrdSsiErrInfo const& eInfo, char* buff, i throw util::Bug(ERR_LOC, err); } -void QueryRequest::_flushError(JobQuery::Ptr const& jq) { - ResponseHandler::Error err = jq->getDescription()->respHandler()->getError(); - jq->getStatus()->updateInfo(_jobIdStr, JobStatus::MERGE_ERROR, "MERGE", err.getCode(), err.getMsg(), - MSG_ERROR); +void QueryRequest::_flushError(JobBase::Ptr const& jq) { + ResponseHandler::Error err = jq->getRespHandler()->getError(); + jq->getStatus()->updateInfo(_jobIdStr, qmeta::JobStatus::MERGE_ERROR, "MERGE", err.getCode(), + err.getMsg(), MSG_ERROR); _errorFinish(true); } @@ -262,8 +267,8 @@ bool QueryRequest::cancel() { _retried = true; // Prevent retries. // Only call the following if the job is NOT already done. if (_finishStatus == ACTIVE) { - auto jq = _jobQuery; - if (jq != nullptr) jq->getStatus()->updateInfo(_jobIdStr, JobStatus::CANCEL, "CANCEL"); + auto jq = _job; + if (jq != nullptr) jq->getStatus()->updateInfo(_jobIdStr, qmeta::JobStatus::CANCEL, "CANCEL"); } } return _errorFinish(true); // return true if errorFinish cancelled @@ -272,7 +277,7 @@ bool QueryRequest::cancel() { /// @return true if this object's JobQuery, or its Executive has been cancelled. /// It takes time for the Executive to flag all jobs as being cancelled bool QueryRequest::isQueryCancelled() { - auto jq = _jobQuery; + auto jq = _job; if (jq == nullptr) { // Need to check if _jobQuery is null due to cancellation. return isQueryRequestCancelled(); @@ -303,7 +308,7 @@ void QueryRequest::cleanup() { // _finishStatusMutex before it is unlocked. // This should reset _jobquery and _keepAlive without risk of either being deleted // before being reset. - shared_ptr jq(move(_jobQuery)); + shared_ptr jq(move(_job)); shared_ptr keep(move(_keepAlive)); } @@ -312,9 +317,23 @@ void QueryRequest::cleanup() { /// a local shared pointer for this QueryRequest and/or its owner JobQuery. /// See QueryRequest::cleanup() /// @return true if this QueryRequest object had the authority to make changes. -bool QueryRequest::_errorFinish(bool stopTrying) { - LOGS(_log, LOG_LVL_DEBUG, "_errorFinish() shouldCancel=" << stopTrying); - auto jq = _jobQuery; +// TODO:UJ Delete QueryRequest class, including this function. +bool QueryRequest::_errorFinish(bool shouldCancel) { + LOGS(_log, LOG_LVL_DEBUG, "_errorFinish() shouldCancel=" << shouldCancel); + + auto jbase = _job; + JobQuery::Ptr jq = dynamic_pointer_cast(jbase); + if (jq == nullptr) { + // TODO:UJ The QueryRequest class will be deleted, so this doen't matter. + UberJob::Ptr uberJob = dynamic_pointer_cast(jbase); + if (uberJob != nullptr) { + throw util::Bug(ERR_LOC, " for _errorFinish to work correctly with UberJob"); + // UberJobs breakup into their JobQueries when they fail and run the jobs directly. + } + return false; + } + + // Normal JobQuery error handling. { // Running _errorFinish more than once could cause errors. lock_guard lock(_finishStatusMutex); @@ -328,20 +347,20 @@ bool QueryRequest::_errorFinish(bool stopTrying) { } // Make the calls outside of the mutex lock. - LOGS(_log, LOG_LVL_DEBUG, "calling Finished(stopTrying=" << stopTrying << ")"); - bool ok = Finished(); + LOGS(_log, LOG_LVL_DEBUG, "calling Finished(shouldCancel=" << shouldCancel << ")"); + bool ok = Finished(shouldCancel); _finishedCalled = true; if (!ok) { - LOGS(_log, LOG_LVL_ERROR, "QueryRequest::_errorFinish NOT ok"); + LOGS(_log, LOG_LVL_ERROR, "QueryRequest::_errorFinish !ok "); } else { LOGS(_log, LOG_LVL_DEBUG, "QueryRequest::_errorFinish ok"); } - if (!_retried.exchange(true) && !stopTrying) { + if (!_retried.exchange(true) && !shouldCancel) { // There's a slight race condition here. _jobQuery::runJob() creates a // new QueryRequest object which will replace this one in _jobQuery. // The replacement could show up before this one's cleanup() is called, - // so this will keep this alive. + // so this will keep this alive until cleanup() is done. LOGS(_log, LOG_LVL_DEBUG, "QueryRequest::_errorFinish retrying"); _keepAlive = jq->getQueryRequest(); // shared pointer to this if (!jq->runJob()) { @@ -384,12 +403,12 @@ void QueryRequest::_finish() { cleanup(); } -/// Inform the Executive that this query completed, and -// Call MarkCompleteFunc only once, it should only be called from _finish() or _errorFinish. void QueryRequest::_callMarkComplete(bool success) { if (!_calledMarkComplete.exchange(true)) { - auto jq = _jobQuery; - if (jq != nullptr) jq->getMarkCompleteFunc()->operator()(success); + auto jq = _job; + if (jq != nullptr) { + jq->callMarkCompleteFunc(success); + } } } diff --git a/src/qdisp/QueryRequest.h b/src/qdisp/QueryRequest.h index 93eedca0f1..1327b4673e 100644 --- a/src/qdisp/QueryRequest.h +++ b/src/qdisp/QueryRequest.h @@ -79,12 +79,13 @@ class RequestError : public std::exception { /// cancellation function with its client that maintains a pointer to the /// QueryRequest. After Finished(), the cancellation function must be prevented /// from accessing the QueryRequest instance. +// TODO:UJ delete this class class QueryRequest : public XrdSsiRequest, public std::enable_shared_from_this { public: typedef std::shared_ptr Ptr; - static Ptr create(std::shared_ptr const& jobQuery) { - Ptr newQueryRequest(new QueryRequest(jobQuery)); + static Ptr create(std::shared_ptr const& jobBase) { + Ptr newQueryRequest(new QueryRequest(jobBase)); return newQueryRequest; } @@ -117,19 +118,21 @@ class QueryRequest : public XrdSsiRequest, public std::enable_shared_from_this const& jobQuery); + QueryRequest(JobBase::Ptr const& job); + /// Inform the Executive that this query completed, and call MarkCompleteFunc only once. + /// This should only be called from _finish() or _errorFinish. void _callMarkComplete(bool success); - bool _importResultFile(JobQuery::Ptr const& jq); + bool _importResultFile(JobBase::Ptr const& jq); bool _importError(std::string const& msg, int code); bool _errorFinish(bool stopTrying = false); void _finish(); - void _flushError(JobQuery::Ptr const& jq); + void _flushError(JobBase::Ptr const& jq); - /// Job information. Not using a weak_ptr as Executive could drop its JobQuery::Ptr before we're done with - /// it. A call to cancel() could reset _jobQuery early, so copy or protect _jobQuery with - /// _finishStatusMutex as needed. If (_finishStatus == ACTIVE) _jobQuery should be good. - std::shared_ptr _jobQuery; + /// Job information. Not using a weak_ptr as Executive could drop its JobBase::Ptr before we're done with + /// it. A call to cancel() could reset _job early, so copy or protect _job with _finishStatusMutex as + /// needed. If (_finishStatus == ACTIVE) _job should be good. + std::shared_ptr _job; std::atomic _retried{false}; ///< Protect against multiple retries of _jobQuery from a /// single QueryRequest. @@ -142,7 +145,7 @@ class QueryRequest : public XrdSsiRequest, public std::enable_shared_from_this _keepAlive; ///< Used to keep this object alive during race condition. QueryId _qid = 0; // for logging - int _jobid = -1; // for logging + JobId _jobid = -1; // for logging std::string _jobIdStr{QueryIdHelper::makeIdStr(0, 0, true)}; ///< for debugging only. std::atomic _finishedCalled{false}; diff --git a/src/qdisp/ResponseHandler.h b/src/qdisp/ResponseHandler.h index 95a82f8357..ec157d8a76 100644 --- a/src/qdisp/ResponseHandler.h +++ b/src/qdisp/ResponseHandler.h @@ -42,7 +42,7 @@ class ResponseSummary; namespace lsst::qserv::qdisp { -class JobQuery; +class JobBase; /// ResponseHandler is an interface that handles result bytes. Tasks are /// submitted to an Executive instance naming a resource unit (what resource is @@ -57,7 +57,7 @@ class ResponseHandler { typedef std::shared_ptr Ptr; ResponseHandler() {} - void setJobQuery(std::shared_ptr const& jobQuery) { _jobQuery = jobQuery; } + void setJobQuery(std::shared_ptr const& jobBase) { _jobBase = jobBase; } virtual ~ResponseHandler() {} /// Process a request for pulling and merging a job result into the result table @@ -66,6 +66,17 @@ class ResponseHandler { /// @return true if successful (no error) virtual bool flush(proto::ResponseSummary const& responseSummary, uint32_t& resultRows) = 0; + /// Collect result data from the worker and merge it with the query result table. + /// @return success - true if the operation was successful + /// @return shouldCancel - if success was false, this being true indicates there + /// was an unrecoverable error in table writing and the query + /// should be cancelled. + virtual std::tuple flushHttp(std::string const& fileUrl, uint64_t expectedRows, + uint64_t& resultRows) = 0; + + /// Add the error to the error output if it is the first error. + virtual void flushHttpError(int errorCode, std::string const& errorMsg, int status) = 0; + /// Signal an unrecoverable error condition. No further calls are expected. virtual void errorFlush(std::string const& msg, int code) = 0; @@ -85,10 +96,10 @@ class ResponseHandler { /// Scrub the results from jobId-attempt from the result table. virtual void prepScrubResults(int jobId, int attempt) = 0; - std::weak_ptr getJobQuery() { return _jobQuery; } + std::weak_ptr getJobBase() { return _jobBase; } private: - std::weak_ptr _jobQuery; + std::weak_ptr _jobBase; }; inline std::ostream& operator<<(std::ostream& os, ResponseHandler const& r) { return r.print(os); } diff --git a/src/qdisp/UberJob.cc b/src/qdisp/UberJob.cc new file mode 100644 index 0000000000..8b092a6d3b --- /dev/null +++ b/src/qdisp/UberJob.cc @@ -0,0 +1,473 @@ +/* + * LSST Data Management System + * + * This product includes software developed by the + * LSST Project (http://www.lsst.org/). + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the LSST License Statement and + * the GNU General Public License along with this program. If not, + * see . + */ + +// Class header +#include "qdisp/UberJob.h" + +// System headers +#include + +// Third-party headers +#include +#include "nlohmann/json.hpp" + +// Qserv headers +#include "cconfig/CzarConfig.h" +#include "global/LogContext.h" +#include "http/Client.h" +#include "http/MetaModule.h" +#include "proto/ProtoImporter.h" +#include "proto/worker.pb.h" +#include "qdisp/JobQuery.h" +#include "qmeta/JobStatus.h" +#include "util/Bug.h" +#include "util/common.h" + +// LSST headers +#include "lsst/log/Log.h" + +using namespace std; +using namespace nlohmann; + +namespace { +LOG_LOGGER _log = LOG_GET("lsst.qserv.qdisp.UberJob"); +} + +namespace lsst { namespace qserv { namespace qdisp { + +UberJob::Ptr UberJob::create(Executive::Ptr const& executive, + std::shared_ptr const& respHandler, int queryId, int uberJobId, + qmeta::CzarId czarId, + czar::CzarChunkMap::WorkerChunksData::Ptr const& workerData) { + UberJob::Ptr uJob(new UberJob(executive, respHandler, queryId, uberJobId, czarId, workerData)); + uJob->_setup(); + return uJob; +} + +UberJob::UberJob(Executive::Ptr const& executive, std::shared_ptr const& respHandler, + int queryId, int uberJobId, qmeta::CzarId czarId, + czar::CzarChunkMap::WorkerChunksData::Ptr const& workerData) + : JobBase(), + _executive(executive), + _respHandler(respHandler), + _queryId(queryId), + _uberJobId(uberJobId), + _czarId(czarId), + _idStr("QID=" + to_string(_queryId) + ":uj=" + to_string(uberJobId)), + _qdispPool(executive->getQdispPool()), + _workerData(workerData) {} + +void UberJob::_setup() { + JobBase::Ptr jbPtr = shared_from_this(); + _respHandler->setJobQuery(jbPtr); +} + +bool UberJob::addJob(JobQuery::Ptr const& job) { + bool success = false; + if (job->setUberJobId(getJobId())) { + lock_guard lck(_jobsMtx); + _jobs.push_back(job); + success = true; + } + if (!success) { + // TODO:UJ not really the right thing to do, but high visibility wanted for now. + throw util::Bug(ERR_LOC, string("job already in UberJob job=") + job->dump() + " uberJob=" + dump()); + } + return success; +} + +bool UberJob::runUberJob() { + LOGS(_log, LOG_LVL_DEBUG, cName(__func__) << " start"); + // Build the uberjob payload for each job. + nlohmann::json uj; + unique_lock jobsLock(_jobsMtx); + auto exec = _executive.lock(); + for (auto const& jqPtr : _jobs) { + jqPtr->getDescription()->incrAttemptCountScrubResultsJson(exec, true); + } + + // Send the uberjob to the worker + auto const method = http::Method::POST; + string const url = "http://" + _wContactInfo->wHost + ":" + to_string(_wContactInfo->wPort) + "/queryjob"; + vector const headers = {"Content-Type: application/json"}; + auto const& czarConfig = cconfig::CzarConfig::instance(); + // See xrdsvc::httpWorkerCzarModule::_handleQueryJob for json message parsing. + json request = {{"version", http::MetaModule::version}, + {"instance_id", czarConfig->replicationInstanceId()}, + {"auth_key", czarConfig->replicationAuthKey()}, + {"worker", _wContactInfo->wId}, + {"czar", + {{"name", czarConfig->name()}, + {"id", czarConfig->id()}, + {"management-port", czarConfig->replicationHttpPort()}, + {"management-host-name", util::get_current_host_fqdn()}}}, + {"uberjob", + {{"queryid", _queryId}, + {"uberjobid", _uberJobId}, + {"czarid", _czarId}, + {"jobs", json::array()}}}}; + + auto& jsUberJob = request["uberjob"]; + auto& jsJobs = jsUberJob["jobs"]; + for (auto const& jbPtr : _jobs) { + auto const description = jbPtr->getDescription(); + if (description == nullptr) { + throw util::Bug(ERR_LOC, cName(__func__) + " description=null for job=" + jbPtr->getIdStr()); + } + auto const jsForWorker = jbPtr->getDescription()->getJsForWorker(); + if (jsForWorker == nullptr) { + throw util::Bug(ERR_LOC, cName(__func__) + " jsForWorker=null for job=" + jbPtr->getIdStr()); + } + json jsJob = {{"jobdesc", *jsForWorker}}; + jsJobs.push_back(jsJob); + jbPtr->getDescription()->resetJsForWorker(); // no longer needed. + } + jobsLock.unlock(); // unlock so other _jobsMtx threads can advance while this waits for transmit + + LOGS(_log, LOG_LVL_DEBUG, cName(__func__) << " REQ " << request); + string const requestContext = "Czar: '" + http::method2string(method) + "' request to '" + url + "'"; + LOGS(_log, LOG_LVL_TRACE, + cName(__func__) << " czarPost url=" << url << " request=" << request.dump() + << " headers=" << headers[0]); + http::Client client(method, url, request.dump(), headers); + bool transmitSuccess = false; + string exceptionWhat; + try { + json const response = client.readAsJson(); + if (0 != response.at("success").get()) { + transmitSuccess = true; + } else { + LOGS(_log, LOG_LVL_WARN, cName(__func__) << " response success=0"); + } + } catch (exception const& ex) { + LOGS(_log, LOG_LVL_WARN, requestContext + " failed, ex: " + ex.what()); + exceptionWhat = ex.what(); + } + if (!transmitSuccess) { + LOGS(_log, LOG_LVL_ERROR, cName(__func__) << " transmit failure, try to send jobs elsewhere"); + _unassignJobs(); // locks _jobsMtx + setStatusIfOk(qmeta::JobStatus::RESPONSE_ERROR, + cName(__func__) + " not transmitSuccess " + exceptionWhat); + + } else { + setStatusIfOk(qmeta::JobStatus::REQUEST, cName(__func__) + " transmitSuccess"); // locks _jobsMtx + } + return false; +} + +void UberJob::prepScrubResults() { + // TODO:UJ There's a good chance this will not be needed as incomplete files (partitions) + // will not be merged so you don't have to worry about removing rows from incomplete + // jobs or uberjobs from the result table. + throw util::Bug(ERR_LOC, + "TODO:UJ If needed, should call prepScrubResults for all JobQueries in the UberJob "); +} + +void UberJob::_unassignJobs() { + lock_guard lck(_jobsMtx); + auto exec = _executive.lock(); + if (exec == nullptr) { + LOGS(_log, LOG_LVL_WARN, cName(__func__) << " exec is null"); + return; + } + for (auto&& job : _jobs) { + string jid = job->getIdStr(); + if (!job->unassignFromUberJob(getJobId())) { + LOGS(_log, LOG_LVL_ERROR, cName(__func__) << " could not unassign job=" << jid << " cancelling"); + exec->addMultiError(qmeta::JobStatus::RETRY_ERROR, "unable to re-assign " + jid, + util::ErrorCode::INTERNAL); + exec->squash(); + return; + } + LOGS(_log, LOG_LVL_DEBUG, + cName(__func__) << " job=" << jid << " attempts=" << job->getAttemptCount()); + } + _jobs.clear(); + bool const setFlag = true; + exec->setFlagFailedUberJob(setFlag); +} + +bool UberJob::isQueryCancelled() { + auto exec = _executive.lock(); + if (exec == nullptr) { + LOGS(_log, LOG_LVL_WARN, cName(__func__) << " _executive == nullptr"); + return true; // Safer to assume the worst. + } + return exec->getCancelled(); +} + +bool UberJob::_setStatusIfOk(qmeta::JobStatus::State newState, string const& msg) { + // must be locked _jobsMtx + auto currentState = _jobStatus->getState(); + // Setting the same state twice indicates that the system is trying to do something it + // has already done, so doing it a second time would be an error. + if (newState <= currentState) { + LOGS(_log, LOG_LVL_WARN, + cName(__func__) << " could not change from state=" << _jobStatus->stateStr(currentState) + << " to " << _jobStatus->stateStr(newState)); + return false; + } + + // Overwriting errors is probably not a good idea. + if (currentState >= qmeta::JobStatus::CANCEL && currentState < qmeta::JobStatus::COMPLETE) { + LOGS(_log, LOG_LVL_WARN, + cName(__func__) << " already error current=" << _jobStatus->stateStr(currentState) + << " new=" << _jobStatus->stateStr(newState)); + return false; + } + + _jobStatus->updateInfo(getIdStr(), newState, msg); + for (auto&& jq : _jobs) { + jq->getStatus()->updateInfo(jq->getIdStr(), newState, msg); + } + return true; +} + +void UberJob::callMarkCompleteFunc(bool success) { + LOGS(_log, LOG_LVL_DEBUG, "UberJob::callMarkCompleteFunc success=" << success); + + lock_guard lck(_jobsMtx); + // Need to set this uberJob's status, however exec->markCompleted will set + // the status for each job when it is called. + string source = string("UberJob_") + (success ? "SUCCESS" : "FAILED"); + _jobStatus->updateInfo(getIdStr(), qmeta::JobStatus::COMPLETE, source); + for (auto&& job : _jobs) { + string idStr = job->getIdStr(); + if (success) { + job->getStatus()->updateInfo(idStr, qmeta::JobStatus::COMPLETE, source); + } else { + job->getStatus()->updateInfoNoErrorOverwrite(idStr, qmeta::JobStatus::RESULT_ERROR, source, + util::ErrorCode::INTERNAL, "UberJob_failure"); + } + auto exec = _executive.lock(); + exec->markCompleted(job->getJobId(), success); + } + + // No longer need these here. Executive should still have copies. + _jobs.clear(); +} + +/// Retrieve and process a result file using the file-based protocol +/// Uses a copy of JobQuery::Ptr instead of _jobQuery as a call to cancel() would reset _jobQuery. +json UberJob::importResultFile(string const& fileUrl, uint64_t rowCount, uint64_t fileSize) { + LOGS(_log, LOG_LVL_DEBUG, + cName(__func__) << " fileUrl=" << fileUrl << " rowCount=" << rowCount << " fileSize=" << fileSize); + + if (isQueryCancelled()) { + LOGS(_log, LOG_LVL_WARN, cName(__func__) << " import job was cancelled."); + return _importResultError(true, "cancelled", "Query cancelled"); + } + + auto exec = _executive.lock(); + if (exec == nullptr || exec->getCancelled()) { + LOGS(_log, LOG_LVL_WARN, cName(__func__) + " no executive or cancelled"); + return _importResultError(true, "cancelled", "Query cancelled - no executive"); + } + + if (exec->isLimitRowComplete()) { + int dataIgnored = exec->incrDataIgnoredCount(); + if ((dataIgnored - 1) % 1000 == 0) { + LOGS(_log, LOG_LVL_INFO, + "UberJob ignoring, enough rows already " + << "dataIgnored=" << dataIgnored); + } + return _importResultError(false, "rowLimited", "Enough rows already"); + } + + LOGS(_log, LOG_LVL_DEBUG, cName(__func__) << " fileSize=" << fileSize); + + bool const statusSet = setStatusIfOk(qmeta::JobStatus::RESPONSE_READY, getIdStr() + " " + fileUrl); + if (!statusSet) { + LOGS(_log, LOG_LVL_WARN, cName(__func__) << " setStatusFail could not set status to RESPONSE_READY"); + return _importResultError(false, "setStatusFail", "could not set status to RESPONSE_READY"); + } + + JobBase::Ptr jBaseThis = shared_from_this(); + weak_ptr ujThis = std::dynamic_pointer_cast(jBaseThis); + + // TODO:UJ lambda may not be the best way to do this, alsocheck synchronization - may need a mutex for + // merging. + auto fileCollectFunc = [ujThis, fileUrl, rowCount](util::CmdData*) { + auto ujPtr = ujThis.lock(); + if (ujPtr == nullptr) { + LOGS(_log, LOG_LVL_DEBUG, + "UberJob::importResultFile::fileCollectFunction uberjob ptr is null " << fileUrl); + return; + } + uint64_t resultRows = 0; + auto [flushSuccess, flushShouldCancel] = + ujPtr->getRespHandler()->flushHttp(fileUrl, rowCount, resultRows); + LOGS(_log, LOG_LVL_DEBUG, ujPtr->cName(__func__) << "::fileCollectFunc"); + if (!flushSuccess) { + // This would probably indicate malformed file+rowCount or + // writing the result table failed. + ujPtr->_importResultError(flushShouldCancel, "mergeError", "merging failed"); + } + + // At this point all data for this job have been read, there's no point in + // having XrdSsi wait for anything. + ujPtr->_importResultFinish(resultRows); + }; + + auto cmd = qdisp::PriorityCommand::Ptr(new qdisp::PriorityCommand(fileCollectFunc)); + exec->queueFileCollect(cmd); + + // If the query meets the limit row complete complete criteria, it will start + // squashing superfluous results so the answer can be returned quickly. + + json jsRet = {{"success", 1}, {"errortype", ""}, {"note", "queued for collection"}}; + return jsRet; +} + +json UberJob::workerError(int errorCode, string const& errorMsg) { + LOGS(_log, LOG_LVL_WARN, cName(__func__) << " errcode=" << errorCode << " errmsg=" << errorMsg); + + bool const deleteData = true; + bool const keepData = !deleteData; + auto exec = _executive.lock(); + if (exec == nullptr || isQueryCancelled()) { + LOGS(_log, LOG_LVL_WARN, cName(__func__) << " no executive or cancelled"); + return _workerErrorFinish(deleteData, "cancelled"); + } + + if (exec->isLimitRowComplete()) { + int dataIgnored = exec->incrDataIgnoredCount(); + if ((dataIgnored - 1) % 1000 == 0) { + LOGS(_log, LOG_LVL_INFO, + cName(__func__) << " ignoring, enough rows already " + << "dataIgnored=" << dataIgnored); + } + return _workerErrorFinish(keepData, "none", "limitRowComplete"); + } + + // Currently there are no detectable recoverable errors from workers. The only + // error that a worker could send back that may possibly be recoverable would + // be a missing table error, which is not trivial to detect. A worker local + // database error may also qualify. + // TODO:UJ see if recoverable errors can be detected on the workers, or + // maybe allow a single retry before sending the error back to the user? + bool recoverableError = false; + recoverableError = true; // TODO:UJ delete after testing + if (recoverableError) { + // The czar should have new maps before the the new UberJob(s) for + // these Jobs are created. (see Czar::_monitor) + _unassignJobs(); + } else { + // Get the error message to the user and kill the user query. + int errState = util::ErrorCode::MYSQLEXEC; + getRespHandler()->flushHttpError(errorCode, errorMsg, errState); + exec->addMultiError(errorCode, errorMsg, errState); + exec->squash(); + } + + string errType = to_string(errorCode) + ":" + errorMsg; + return _workerErrorFinish(deleteData, errType, ""); +} + +json UberJob::_importResultError(bool shouldCancel, string const& errorType, string const& note) { + json jsRet = {{"success", 0}, {"errortype", errorType}, {"note", note}}; + // In all cases, the worker should delete the file as this czar will not ask for it. + + auto exec = _executive.lock(); + if (exec != nullptr) { + LOGS(_log, LOG_LVL_ERROR, + cName(__func__) << " shouldCancel=" << shouldCancel << " errorType=" << errorType << " " + << note); + if (shouldCancel) { + LOGS(_log, LOG_LVL_ERROR, cName(__func__) << " failing jobs"); + callMarkCompleteFunc(false); // all jobs failed, no retry + exec->squash(); + } else { + /// - each JobQuery in _jobs needs to be flagged as needing to be + /// put in an UberJob and it's attempt count increased and checked + /// against the attempt limit. + /// - executive needs to be told to make new UberJobs until all + /// JobQueries are being handled by an UberJob. + LOGS(_log, LOG_LVL_ERROR, cName(__func__) << " reassigning jobs"); + _unassignJobs(); + exec->assignJobsToUberJobs(); + } + } else { + LOGS(_log, LOG_LVL_INFO, + cName(__func__) << " already cancelled shouldCancel=" << shouldCancel + << " errorType=" << errorType << " " << note); + } + return jsRet; +} + +nlohmann::json UberJob::_importResultFinish(uint64_t resultRows) { + LOGS(_log, LOG_LVL_DEBUG, cName(__func__) << " start"); + /// If this is called, the file has been collected and the worker should delete it + /// + /// This function should call markComplete for all jobs in the uberjob + /// and return a "success:1" json message to be sent to the worker. + bool const statusSet = + setStatusIfOk(qmeta::JobStatus::RESPONSE_DONE, getIdStr() + " _importResultFinish"); + if (!statusSet) { + LOGS(_log, LOG_LVL_DEBUG, cName(__func__) << " failed to set status " << getIdStr()); + return {{"success", 0}, {"errortype", "statusMismatch"}, {"note", "failed to set status"}}; + } + auto exec = _executive.lock(); + if (exec == nullptr) { + LOGS(_log, LOG_LVL_DEBUG, cName(__func__) << " executive is null"); + return {{"success", 0}, {"errortype", "cancelled"}, {"note", "executive is null"}}; + } + + bool const success = true; + callMarkCompleteFunc(success); // sets status to COMPLETE + exec->addResultRows(resultRows); + exec->checkLimitRowComplete(); + + json jsRet = {{"success", 1}, {"errortype", ""}, {"note", ""}}; + return jsRet; +} + +nlohmann::json UberJob::_workerErrorFinish(bool deleteData, std::string const& errorType, + std::string const& note) { + // If this is called, the file has been collected and the worker should delete it + // + // Should this call markComplete for all jobs in the uberjob??? + // TODO:UJ Only recoverable errors would be: communication failure, or missing table ??? + // Return a "success:1" json message to be sent to the worker. + auto exec = _executive.lock(); + if (exec == nullptr) { + LOGS(_log, LOG_LVL_DEBUG, cName(__func__) << " executive is null"); + return {{"success", 0}, {"errortype", "cancelled"}, {"note", "executive is null"}}; + } + + json jsRet = {{"success", 1}, {"deletedata", deleteData}, {"errortype", ""}, {"note", ""}}; + return jsRet; +} + +std::ostream& UberJob::dumpOS(std::ostream& os) const { + os << "(jobs sz=" << _jobs.size() << "("; + lock_guard lockJobsMtx(_jobsMtx); + for (auto const& job : _jobs) { + JobDescription::Ptr desc = job->getDescription(); + ResourceUnit ru = desc->resource(); + os << ru.db() << ":" << ru.chunk() << ","; + } + os << "))"; + return os; +} + +}}} // namespace lsst::qserv::qdisp diff --git a/src/qdisp/UberJob.h b/src/qdisp/UberJob.h new file mode 100644 index 0000000000..0dd2f69cdc --- /dev/null +++ b/src/qdisp/UberJob.h @@ -0,0 +1,173 @@ +/* + * LSST Data Management System + * + * This product includes software developed by the + * LSST Project (http://www.lsst.org/). + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the LSST License Statement and + * the GNU General Public License along with this program. If not, + * see . + */ +#ifndef LSST_QSERV_QDISP_UBERJOB_H +#define LSST_QSERV_QDISP_UBERJOB_H + +// System headers + +// Qserv headers +#include "qmeta/types.h" +#include "czar/CzarChunkMap.h" // Need nested class. TODO:UJ Make non-nested? +#include "czar/CzarRegistry.h" // Need nested class. TODO:UJ Make non-nested? +#include "qdisp/JobBase.h" +#include "qmeta/JobStatus.h" + +// This header declarations +namespace lsst::qserv::qdisp { + +class JobQuery; + +class QueryRequest; + +/// This class is a contains x number of jobs that need to go to the same worker +/// from a single user query, and contact information for the worker. It also holds +/// some information common to all jobs. +/// The UberJob constructs the message to send to the worker and handles collecting +/// and merging the results. +/// When this UberJobCompletes, all the Jobs it contains are registered as completed. +/// If this UberJob fails, it will be destroyed, un-assigning all of its Jobs. +/// Those Jobs will need to be reassigned to new UberJobs, or the query cancelled. +class UberJob : public JobBase { +public: + using Ptr = std::shared_ptr; + + static Ptr create(std::shared_ptr const& executive, + std::shared_ptr const& respHandler, int queryId, int uberJobId, + qmeta::CzarId czarId, czar::CzarChunkMap::WorkerChunksData::Ptr const& workerData); + + UberJob() = delete; + UberJob(UberJob const&) = delete; + UberJob& operator=(UberJob const&) = delete; + + virtual ~UberJob(){}; + + bool addJob(std::shared_ptr const& job); + bool runUberJob(); + + std::string cName(const char* funcN) const { return std::string("UberJob::") + funcN + " " + getIdStr(); } + + QueryId getQueryId() const override { return _queryId; } + UberJobId getJobId() const override { + return _uberJobId; + } // TODO:UJ change name when JobBase no longer needed. + std::string const& getIdStr() const override { return _idStr; } + std::shared_ptr getQdispPool() override { return _qdispPool; } // TODO:UJ relocate to JobBase + std::string const& getPayload() const override { return _payload; } // TODO:UJ delete when possible. + std::shared_ptr getRespHandler() override { return _respHandler; } + std::shared_ptr getStatus() override { + return _jobStatus; + } // TODO:UJ relocate to JobBase + bool getScanInteractive() const override { return false; } ///< UberJobs are never interactive. + bool isQueryCancelled() override; // TODO:UJ relocate to JobBase + void callMarkCompleteFunc(bool success) override; ///< call markComplete for all jobs in this UberJob. + std::shared_ptr getExecutive() override { return _executive.lock(); } + + void setQueryRequest(std::shared_ptr const& qr) override { + ; // Do nothing as QueryRequest is only needed for xrootd. TODO:UJ delete function. + } + + /// Return false if not ok to set the status to newState, otherwise set the state for + /// this UberJob and all jobs it contains to newState. + /// This is used both to set status and prevent the system from repeating operations + /// that have already happened. If it returns false, the thread calling this + /// should stop processing. + bool setStatusIfOk(qmeta::JobStatus::State newState, std::string const& msg) { + std::lock_guard jobLock(_jobsMtx); + return _setStatusIfOk(newState, msg); + } + + int getJobCount() const { return _jobs.size(); } + + /// TODO:UJ may not need, + void prepScrubResults(); + + /// Set the worker information needed to send messages to the worker believed to + /// be responsible for the chunks handled in this UberJob. + void setWorkerContactInfo(czar::CzarRegistry::WorkerContactInfo::Ptr const& wContactInfo) { + _wContactInfo = wContactInfo; + } + + /// Get the data for the worker that should handle this UberJob. + czar::CzarChunkMap::WorkerChunksData::Ptr getWorkerData() { return _workerData; } + + /// Collect and merge the results from the worker. + nlohmann::json importResultFile(std::string const& fileUrl, uint64_t rowCount, uint64_t fileSize); + + /// Handle an error from the worker. + nlohmann::json workerError(int errorCode, std::string const& errorMsg); + + std::ostream& dumpOS(std::ostream& os) const override; + +private: + UberJob(std::shared_ptr const& executive, std::shared_ptr const& respHandler, + int queryId, int uberJobId, qmeta::CzarId czarId, + czar::CzarChunkMap::WorkerChunksData::Ptr const& workerData); + + /// Used to setup elements that can't be done in the constructor. + void _setup(); + + /// @see setStatusIfOk + /// note: _jobsMtx must be locked before calling. + bool _setStatusIfOk(qmeta::JobStatus::State newState, std::string const& msg); + + /// unassign all Jobs in this UberJob and set the Executive flag to indicate that Jobs need + /// reassignment. + void _unassignJobs(); + + /// Import and error from trying to collect results. + /// TODO:UJ The strings for errorType should have a centralized location in the code - global or util + nlohmann::json _importResultError(bool shouldCancel, std::string const& errorType, + std::string const& note); + + /// Let the executive know that all Jobs in UberJob are complete. + nlohmann::json _importResultFinish(uint64_t resultRows); + + /// Let the Executive know about errors while handling results. + nlohmann::json _workerErrorFinish(bool successful, std::string const& errorType = std::string(), + std::string const& note = std::string()); + + std::vector> _jobs; ///< List of Jobs in this UberJob. + mutable std::mutex _jobsMtx; ///< Protects _jobs, _jobStatus + std::atomic _started{false}; + qmeta::JobStatus::Ptr _jobStatus{new qmeta::JobStatus()}; // TODO:UJ Maybe the JobStatus class should be + // changed to better represent UberJobs + + std::string _payload; ///< XrdSsi message to be sent to the _workerResource. TODO:UJ remove when possible + + std::weak_ptr _executive; + std::shared_ptr _respHandler; + QueryId const _queryId; + UberJobId const _uberJobId; + qmeta::CzarId const _czarId; + + std::string const _idStr; + std::shared_ptr _qdispPool; // TODO:UJ remove when possible. + + // Map of workerData + czar::CzarChunkMap::WorkerChunksData::Ptr _workerData; // TODO:UJ this may not be needed + + // Contact information for the target worker. + czar::CzarRegistry::WorkerContactInfo::Ptr _wContactInfo; +}; + +} // namespace lsst::qserv::qdisp + +#endif // LSST_QSERV_QDISP_UBERJOB_H diff --git a/src/qdisp/testQDisp.cc b/src/qdisp/testQDisp.cc index 45b44fc6c1..74483ab395 100644 --- a/src/qdisp/testQDisp.cc +++ b/src/qdisp/testQDisp.cc @@ -40,10 +40,10 @@ #include "global/ResourceUnit.h" #include "qdisp/Executive.h" #include "qdisp/JobQuery.h" -#include "qdisp/MessageStore.h" #include "qdisp/QueryRequest.h" #include "qdisp/SharedResources.h" #include "qdisp/XrdSsiMocks.h" +#include "qmeta/MessageStore.h" #include "qproc/ChunkQuerySpec.h" #include "qproc/TaskMsgFactory.h" #include "util/threadSafe.h" @@ -70,7 +70,15 @@ class MockTaskMsgFactory : public TaskMsgFactory { int attemptCount, qmeta::CzarId czarId, std::ostream& os) override { os << mockPayload; } + + std::shared_ptr makeMsgJson(ChunkQuerySpec const& s, std::string const& chunkResultName, + QueryId queryId, int jobId, int attemptCount, + qmeta::CzarId czarId) override { + return jsPtr; + } + std::string mockPayload; + std::shared_ptr jsPtr; }; } // namespace lsst::qserv::qproc @@ -152,7 +160,7 @@ class SetupTest { std::string qrMsg; std::string str; qdisp::ExecutiveConfig::Ptr conf; - std::shared_ptr ms; + std::shared_ptr ms; qdisp::QdispPool::Ptr qdispPool; qdisp::SharedResources::Ptr sharedResources; qdisp::Executive::Ptr ex; @@ -164,7 +172,7 @@ class SetupTest { qdisp::XrdSsiServiceMock::Reset(); str = qdisp::ExecutiveConfig::getMockStr(); conf = std::make_shared(str, 0); // No updating of QMeta. - ms = std::make_shared(); + ms = std::make_shared(); qdispPool = std::make_shared(true); sharedResources = qdisp::SharedResources::create(qdispPool); @@ -204,7 +212,7 @@ BOOST_AUTO_TEST_CASE(Executive) { LOGS_DEBUG("jobs=1"); tEnv.ex->join(); LOGS_DEBUG("Executive single query test checking"); - BOOST_CHECK(tEnv.jqTest->getStatus()->getInfo().state == qdisp::JobStatus::COMPLETE); + BOOST_CHECK(tEnv.jqTest->getStatus()->getInfo().state == qmeta::JobStatus::COMPLETE); BOOST_CHECK(tEnv.ex->getEmpty() == true); } @@ -249,7 +257,7 @@ BOOST_AUTO_TEST_CASE(Executive) { BOOST_AUTO_TEST_CASE(MessageStore) { LOGS_DEBUG("MessageStore test start"); - qdisp::MessageStore ms; + qmeta::MessageStore ms; BOOST_CHECK(ms.messageCount() == 0); ms.addMessage(123, "EXECUTIVE", 456, "test1"); std::string str("test2"); @@ -257,7 +265,7 @@ BOOST_AUTO_TEST_CASE(MessageStore) { ms.addMessage(86, "EXECUTIVE", -12, "test3"); BOOST_CHECK(ms.messageCount() == 3); BOOST_CHECK(ms.messageCount(-12) == 2); - qdisp::QueryMessage qm = ms.getMessage(1); + qmeta::QueryMessage qm = ms.getMessage(1); BOOST_CHECK(qm.chunkId == 124 && qm.code == -12 && str.compare(qm.description) == 0); LOGS_DEBUG("MessageStore test end"); } @@ -271,7 +279,7 @@ BOOST_AUTO_TEST_CASE(QueryRequest) { SequentialInt sequence(0); tEnv.jqTest = executiveTest(tEnv.ex, sequence, chunkId, tEnv.qrMsg, 1); tEnv.ex->join(); - BOOST_CHECK(tEnv.jqTest->getStatus()->getInfo().state == qdisp::JobStatus::RESULT_ERROR); + BOOST_CHECK(tEnv.jqTest->getStatus()->getInfo().state == qmeta::JobStatus::RESULT_ERROR); BOOST_CHECK(qdisp::XrdSsiServiceMock::getFinCount() > 1); // Retried, eh? BOOST_CHECK(qdisp::XrdSsiServiceMock::getFinCount() == qdisp::XrdSsiServiceMock::getReqCount()); } @@ -284,7 +292,7 @@ BOOST_AUTO_TEST_CASE(QueryRequest) { SequentialInt sequence(0); tEnv.jqTest = executiveTest(tEnv.ex, sequence, chunkId, tEnv.qrMsg, 1); tEnv.ex->join(); - BOOST_CHECK(tEnv.jqTest->getStatus()->getInfo().state == qdisp::JobStatus::RESULT_ERROR); + BOOST_CHECK(tEnv.jqTest->getStatus()->getInfo().state == qmeta::JobStatus::RESULT_ERROR); BOOST_CHECK(qdisp::XrdSsiServiceMock::getFinCount() == 1); } @@ -297,7 +305,7 @@ BOOST_AUTO_TEST_CASE(QueryRequest) { tEnv.jqTest = executiveTest(tEnv.ex, sequence, chunkId, tEnv.qrMsg, 1); tEnv.ex->join(); LOGS_DEBUG("tEnv.jqTest->...state = " << tEnv.jqTest->getStatus()->getInfo().state); - BOOST_CHECK(tEnv.jqTest->getStatus()->getInfo().state == qdisp::JobStatus::RESULT_ERROR); + BOOST_CHECK(tEnv.jqTest->getStatus()->getInfo().state == qmeta::JobStatus::RESULT_ERROR); BOOST_CHECK(qdisp::XrdSsiServiceMock::getFinCount() == 1); // No retries! } @@ -314,7 +322,7 @@ BOOST_AUTO_TEST_CASE(QueryRequest) { tEnv.jqTest = executiveTest(tEnv.ex, sequence, chunkId, tEnv.qrMsg, 1); tEnv.ex->join(); BOOST_CHECK(tEnv.jqTest->getStatus()->getInfo().state == - qdisp::JobStatus::COMPLETE); + qmeta::JobStatus::COMPLETE); BOOST_CHECK(qdisp::XrdSsiServiceMock::getFinCount() == 1); } */ diff --git a/src/qmeta/CMakeLists.txt b/src/qmeta/CMakeLists.txt index faea86ad69..9c4527ef9c 100644 --- a/src/qmeta/CMakeLists.txt +++ b/src/qmeta/CMakeLists.txt @@ -1,6 +1,8 @@ add_library(qserv_meta SHARED) target_sources(qserv_meta PRIVATE + JobStatus.cc + MessageStore.cc QMeta.cc QMetaMysql.cc QMetaSelect.cc @@ -10,10 +12,10 @@ target_sources(qserv_meta PRIVATE target_link_libraries(qserv_meta PUBLIC cconfig - qdisp qserv_common log mysqlclient_r + http ) install(TARGETS qserv_meta) diff --git a/src/qdisp/JobStatus.cc b/src/qmeta/JobStatus.cc similarity index 74% rename from src/qdisp/JobStatus.cc rename to src/qmeta/JobStatus.cc index 20dad135a3..bf31305499 100644 --- a/src/qdisp/JobStatus.cc +++ b/src/qmeta/JobStatus.cc @@ -33,7 +33,7 @@ */ // Class header -#include "qdisp/JobStatus.h" +#include "qmeta/JobStatus.h" // System headers #include @@ -44,18 +44,43 @@ #include "lsst/log/Log.h" namespace { -LOG_LOGGER _log = LOG_GET("lsst.qserv.qdisp.JobStatus"); +LOG_LOGGER _log = LOG_GET("lsst.qserv.qmeta.JobStatus"); } -namespace lsst::qserv::qdisp { +namespace lsst::qserv::qmeta { JobStatus::Info::Info() : state(UNKNOWN), stateCode(0) { stateTime = getNow(); } void JobStatus::updateInfo(std::string const& idMsg, JobStatus::State s, std::string const& source, int code, std::string const& desc, MessageSeverity severity) { std::lock_guard lock(_mutex); + _updateInfo(idMsg, s, source, code, desc, severity); +} + +void JobStatus::_updateInfo(std::string const& idMsg, JobStatus::State s, std::string const& source, int code, + std::string const& desc, MessageSeverity severity) { + LOGS(_log, LOG_LVL_DEBUG, + idMsg << " Updating state to: " << s << " code=" << code << " " << desc << " src=" << source); + _info.stateTime = getNow(); + _info.state = s; + _info.stateCode = code; + _info.stateDesc = desc; + _info.source = source; + _info.severity = severity; +} + +void JobStatus::updateInfoNoErrorOverwrite(std::string const& idMsg, JobStatus::State s, + std::string const& source, int code, std::string const& desc, + MessageSeverity severity) { + std::lock_guard lock(_mutex); + auto jState = _info.state; + if (jState != qmeta::JobStatus::CANCEL && jState != qmeta::JobStatus::RESPONSE_ERROR && + jState != qmeta::JobStatus::RESULT_ERROR && jState != qmeta::JobStatus::MERGE_ERROR) { + _updateInfo(idMsg, s, source, code, desc, severity); + } - LOGS(_log, LOG_LVL_DEBUG, idMsg << " Updating state to: " << s << " code=" << code << " " << desc); + LOGS(_log, LOG_LVL_DEBUG, + idMsg << " Updating state to: " << s << " code=" << code << " " << desc << " src=" << source); _info.stateTime = getNow(); _info.state = s; _info.stateCode = code; @@ -140,4 +165,4 @@ std::ostream& operator<<(std::ostream& os, JobStatus::Info const& info) { return os; } -} // namespace lsst::qserv::qdisp +} // namespace lsst::qserv::qmeta diff --git a/src/qdisp/JobStatus.h b/src/qmeta/JobStatus.h similarity index 79% rename from src/qdisp/JobStatus.h rename to src/qmeta/JobStatus.h index d693921001..89ecda0c84 100644 --- a/src/qdisp/JobStatus.h +++ b/src/qmeta/JobStatus.h @@ -20,8 +20,8 @@ * the GNU General Public License along with this program. If not, * see . */ -#ifndef LSST_QSERV_QDISP_JOBSTATUS_H -#define LSST_QSERV_QDISP_JOBSTATUS_H +#ifndef LSST_QSERV_QMETA_JOBSTATUS_H +#define LSST_QSERV_QMETA_JOBSTATUS_H // System headers #include @@ -34,9 +34,9 @@ // qserv headers #include "global/constants.h" -namespace lsst::qserv::qdisp { +namespace lsst::qserv::qmeta { -/** Monitor execution of a chunk query against an SSI ressource +/** Monitor execution of a chunk query. * * JobStatus instances receive timestamped reports of execution State. This * allows a manager object to receive updates on status without exposing its @@ -61,13 +61,14 @@ class JobStatus { UNKNOWN = 0, REQUEST = 1203, RESPONSE_READY, - RESPONSE_ERROR, RESPONSE_DATA, RESPONSE_DATA_NACK, RESPONSE_DONE, + CANCEL, + RESPONSE_ERROR, // Errors must be between CANCEL and COMPLETE RESULT_ERROR, MERGE_ERROR, - CANCEL, + RETRY_ERROR, COMPLETE = 2000 }; @@ -96,6 +97,13 @@ class JobStatus { void updateInfo(std::string const& idMsg, State s, std::string const& source, int code = 0, std::string const& desc = "", MessageSeverity severity = MSG_INFO); + /// Same as updateInfo() except existing error states are not overwritten. + /// @see updateInfo() + /// @return Negative values indicate the status was changed, zero and positive values + void updateInfoNoErrorOverwrite(std::string const& idMsg, State s, std::string const& source, + int code = 0, std::string const& desc = "", + MessageSeverity severity = MSG_INFO); + struct Info { Info(); // More detailed debugging may store a vector of states, appending @@ -116,11 +124,21 @@ class JobStatus { return _info; } + State getState() const { + std::lock_guard lock(_mutex); + return _info.state; + } + static std::string stateStr(JobStatus::State const& state); friend std::ostream& operator<<(std::ostream& os, JobStatus const& es); private: + /// @see updateInfo() + /// note: _mutex must be held before calling. + void _updateInfo(std::string const& idMsg, JobStatus::State s, std::string const& source, int code, + std::string const& desc, MessageSeverity severity); + Info _info; mutable std::mutex _mutex; ///< Mutex to guard concurrent updates }; @@ -128,6 +146,6 @@ std::ostream& operator<<(std::ostream& os, JobStatus const& es); std::ostream& operator<<(std::ostream& os, JobStatus::Info const& inf); std::ostream& operator<<(std::ostream& os, JobStatus::State const& state); -} // namespace lsst::qserv::qdisp +} // namespace lsst::qserv::qmeta -#endif // LSST_QSERV_QDISP_JOBSTATUS_H +#endif // LSST_QSERV_META_JOBSTATUS_H diff --git a/src/qdisp/MessageStore.cc b/src/qmeta/MessageStore.cc similarity index 87% rename from src/qdisp/MessageStore.cc rename to src/qmeta/MessageStore.cc index 784dd847b3..e4e32fe746 100644 --- a/src/qdisp/MessageStore.cc +++ b/src/qmeta/MessageStore.cc @@ -23,7 +23,7 @@ // See MessageStore.h // Class header -#include "qdisp/MessageStore.h" +#include "qmeta/MessageStore.h" // System headers #include @@ -36,13 +36,12 @@ // Qserv headers #include "global/constants.h" -#include "qdisp/JobStatus.h" namespace { -LOG_LOGGER _log = LOG_GET("lsst.qserv.qdisp.MessageStore"); +LOG_LOGGER _log = LOG_GET("lsst.qserv.qmeta.MessageStore"); } -namespace lsst::qserv::qdisp { +namespace lsst::qserv::qmeta { //////////////////////////////////////////////////////////////////////// // public @@ -50,9 +49,9 @@ namespace lsst::qserv::qdisp { void MessageStore::addMessage(int chunkId, std::string const& msgSource, int code, std::string const& description, MessageSeverity severity, - JobStatus::TimeType timestamp) { - if (timestamp == JobStatus::TimeType()) { - timestamp = JobStatus::getNow(); + qmeta::JobStatus::TimeType timestamp) { + if (timestamp == qmeta::JobStatus::TimeType()) { + timestamp = qmeta::JobStatus::getNow(); } auto level = code < 0 ? LOG_LVL_ERROR : LOG_LVL_DEBUG; LOGS(_log, level, "Add msg: " << chunkId << " " << msgSource << " " << code << " " << description); @@ -80,4 +79,4 @@ int MessageStore::messageCount(int code) const { return count; } -} // namespace lsst::qserv::qdisp +} // namespace lsst::qserv::qmeta diff --git a/src/qdisp/MessageStore.h b/src/qmeta/MessageStore.h similarity index 90% rename from src/qdisp/MessageStore.h rename to src/qmeta/MessageStore.h index c42114f01e..7fe9823c55 100644 --- a/src/qdisp/MessageStore.h +++ b/src/qmeta/MessageStore.h @@ -29,8 +29,8 @@ /// The MessageStore classes are responsible for maintaining status and /// error messages associated with a query. -#ifndef LSST_QSERV_QDISP_MESSAGESTORE_H -#define LSST_QSERV_QDISP_MESSAGESTORE_H +#ifndef LSST_QSERV_QMETA_MESSAGESTORE_H +#define LSST_QSERV_QMETA_MESSAGESTORE_H // System headers #include @@ -40,13 +40,13 @@ // Qserv headers #include "global/constants.h" -#include "qdisp/JobStatus.h" +#include "qmeta/JobStatus.h" -namespace lsst::qserv::qdisp { +namespace lsst::qserv::qmeta { struct QueryMessage { QueryMessage(int chunkId_, std::string const& msgSource_, int code_, std::string description_, - JobStatus::TimeType timestamp_, MessageSeverity severity_) + qmeta::JobStatus::TimeType timestamp_, MessageSeverity severity_) : chunkId(chunkId_), msgSource(msgSource_), code(code_), @@ -58,7 +58,7 @@ struct QueryMessage { std::string msgSource; int code; std::string description; - JobStatus::TimeType timestamp; + qmeta::JobStatus::TimeType timestamp; MessageSeverity severity; }; @@ -95,7 +95,7 @@ class MessageStore { */ void addMessage(int chunkId, std::string const& msgSource, int code, std::string const& description, MessageSeverity severity_ = MessageSeverity::MSG_INFO, - JobStatus::TimeType timestamp = JobStatus::TimeType()); + qmeta::JobStatus::TimeType timestamp = qmeta::JobStatus::TimeType()); /** Add an error message to this MessageStore * @@ -117,6 +117,6 @@ class MessageStore { std::vector _queryMessages; }; -} // namespace lsst::qserv::qdisp +} // namespace lsst::qserv::qmeta -#endif // LSST_QSERV_QDISP_MESSAGESTORE_H +#endif // LSST_QSERV_QMETA_MESSAGESTORE_H diff --git a/src/qmeta/QMeta.h b/src/qmeta/QMeta.h index a8c3a6672a..d4b066ecbc 100644 --- a/src/qmeta/QMeta.h +++ b/src/qmeta/QMeta.h @@ -38,14 +38,57 @@ #include "qmeta/types.h" namespace lsst::qserv::qdisp { -class MessageStore; class QueryMessage; } // namespace lsst::qserv::qdisp namespace lsst::qserv::qmeta { +class MessageStore; + /// @addtogroup qmeta +/** + * The structure ChunkMap encapsulates a disposition of chunks at Qserv workers + * along with a time when the map was updated. + * + * Here is an example on how to using the map for getting info on all chunks in + * the given context: + * @code + * std::string const worker = "worker-001"; + * std::string const database = "LSST-DR01"; + * std::string const table = "Object"; + * + * ChunkMap const& chunkMap = ...; + * for (auto const& [chunk, size] : chunkMap[worker][database][table]) { + * ... + * } + * @endcode + */ +struct QMetaChunkMap { + /// @return 'true' if the map is empty (or constructed using the default constructor) + bool empty() const { + return workers.empty() || (std::chrono::time_point() == updateTime); + } + + // NOTE: Separate types were added here for the sake of clarity to avoid + // a definition of the unreadable nested map. + + struct ChunkInfo { + unsigned int chunk = 0; ///< The chunk number + size_t size = 0; ///< The file size (in bytes) of the chunk table + }; + typedef std::vector Chunks; ///< Collection of chunks + typedef std::map Tables; ///< tables-to-chunks + typedef std::map Databases; ///< Databases-to-tables + typedef std::map Workers; ///< Workers-to-databases + + /// The chunk disposition map for all workers. + Workers workers; + + /// The last time the map was updated (since UNIX Epoch). + TIMEPOINT updateTime; +}; + /** * @ingroup qmeta * @brief Interface for query metadata. @@ -59,48 +102,6 @@ class QMeta { */ typedef std::vector > TableNames; - /** - * The structure ChunkMap encapsulates a disposition of chunks at Qserv workers - * along with a time when the map was updated. - * - * Here is an example on how to using the map for getting info on all chunks in - * the given context: - * @code - * std::string const worker = "worker-001"; - * std::string const database = "LSST-DR01"; - * std::string const table = "Object"; - * - * ChunkMap const& chunkMap = ...; - * for (auto const& [chunk, size] : chunkMap[worker][database][table]) { - * ... - * } - * @endcode - */ - struct ChunkMap { - /// @return 'true' if the map is empty (or constructed using the default constructor) - bool empty() const { - return workers.empty() || (std::chrono::time_point() == updateTime); - } - - // NOTE: Separate types were added here for the sake of clarity to avoid - // a definition of the unreadable nested map. - - struct ChunkInfo { - unsigned int chunk = 0; ///< The chunk number - size_t size = 0; ///< The file size (in bytes) of the chunk table - }; - typedef std::vector Chunks; ///< Collection of chunks - typedef std::map Tables; ///< tables-to-chunks - typedef std::map Databases; ///< Databases-to-tables - typedef std::map Workers; ///< Workers-to-databases - - /// The chunk disposition map for all workers. - Workers workers; - - /// The last time the map was updated (since UNIX Epoch). - TIMEPOINT updateTime; - }; - /** * Create QMeta instance from configuration dictionary. * @@ -328,7 +329,7 @@ class QMeta { virtual void saveResultQuery(QueryId queryId, std::string const& query) = 0; /// Write messages/errors generated during the query to the QMessages table. - virtual void addQueryMessages(QueryId queryId, std::shared_ptr const& msgStore) = 0; + virtual void addQueryMessages(QueryId queryId, std::shared_ptr const& msgStore) = 0; /** * Fetch the chunk map which was updated after the specified time point. @@ -342,8 +343,9 @@ class QMeta { * @throws EmptyTableError if the corresponding metadata table doesn't have any record * @throws SqlError for any other error related to MySQL */ - virtual ChunkMap getChunkMap(std::chrono::time_point const& prevUpdateTime = - std::chrono::time_point()) = 0; + virtual QMetaChunkMap getChunkMap( + std::chrono::time_point const& prevUpdateTime = + std::chrono::time_point()) = 0; protected: // Default constructor diff --git a/src/qmeta/QMetaMysql.cc b/src/qmeta/QMetaMysql.cc index 40befc5e97..3535c66fea 100644 --- a/src/qmeta/QMetaMysql.cc +++ b/src/qmeta/QMetaMysql.cc @@ -36,9 +36,9 @@ // Qserv headers #include "global/stringUtil.h" -#include "qdisp/JobStatus.h" -#include "qdisp/MessageStore.h" #include "qmeta/Exceptions.h" +#include "qmeta/JobStatus.h" +#include "qmeta/MessageStore.h" #include "qmeta/QMetaTransaction.h" #include "sql/SqlConnection.h" #include "sql/SqlConnectionFactory.h" @@ -807,14 +807,14 @@ void QMetaMysql::saveResultQuery(QueryId queryId, string const& query) { trans->commit(); } -void QMetaMysql::addQueryMessages(QueryId queryId, shared_ptr const& msgStore) { +void QMetaMysql::addQueryMessages(QueryId queryId, shared_ptr const& msgStore) { int msgCount = msgStore->messageCount(); int cancelCount = 0; int completeCount = 0; int execFailCount = 0; map msgCountMap; for (int i = 0; i != msgCount; ++i) { - qdisp::QueryMessage const& qMsg = msgStore->getMessage(i); + qmeta::QueryMessage const& qMsg = msgStore->getMessage(i); try { _addQueryMessage(queryId, qMsg, cancelCount, completeCount, execFailCount, msgCountMap); } catch (qmeta::SqlError const& ex) { @@ -823,11 +823,11 @@ void QMetaMysql::addQueryMessages(QueryId queryId, shared_ptr 0 || execFailCount > 0) { - qdisp::QueryMessage qm(-1, "CANCELTOTAL", 0, + qmeta::QueryMessage qm(-1, "CANCELTOTAL", 0, string("{\"CANCEL_count\":") + to_string(cancelCount) + ", \"EXECFAIL_count\":" + to_string(execFailCount) + ", \"COMPLETE_count\":" + to_string(completeCount) + "}", - qdisp::JobStatus::getNow(), MessageSeverity::MSG_INFO); + qmeta::JobStatus::getNow(), MessageSeverity::MSG_INFO); _addQueryMessage(queryId, qm, cancelCount, completeCount, execFailCount, msgCountMap); } @@ -836,16 +836,16 @@ void QMetaMysql::addQueryMessages(QueryId queryId, shared_ptr const& prevUpdateTime) { +QMetaChunkMap QMetaMysql::getChunkMap(chrono::time_point const& prevUpdateTime) { lock_guard lock(_dbMutex); - QMeta::ChunkMap chunkMap; + QMetaChunkMap chunkMap; auto trans = QMetaTransaction::create(*_conn); @@ -856,7 +856,7 @@ QMeta::ChunkMap QMetaMysql::getChunkMap(chrono::time_point (prevUpdateTime == chrono::time_point()) || (prevUpdateTime < updateTime); if (!force) { trans->commit(); - return QMeta::ChunkMap(); + return QMetaChunkMap(); } // Read the map itself @@ -882,7 +882,10 @@ QMeta::ChunkMap QMetaMysql::getChunkMap(chrono::time_point string const& table = row[2]; unsigned int chunk = lsst::qserv::stoui(row[3]); size_t const size = stoull(row[4]); - chunkMap.workers[worker][database][table].push_back(ChunkMap::ChunkInfo{chunk, size}); + chunkMap.workers[worker][database][table].push_back(QMetaChunkMap::ChunkInfo{chunk, size}); + LOGS(_log, LOG_LVL_TRACE, + "QMetaInsrt{worker=" << worker << " dbN=" << database << " tblN=" << table + << " chunk=" << chunk << " sz=" << size); } chunkMap.updateTime = updateTime; } catch (exception const& ex) { @@ -896,7 +899,8 @@ chrono::time_point QMetaMysql::_getChunkMapUpdateTime(lock sql::SqlErrorObject errObj; sql::SqlResults results; string const tableName = "chunkMapStatus"; - string const query = "SELECT `update_time` FROM `" + tableName + "` ORDER BY `update_time` DESC LIMIT 1"; + string const query = + "SELECT TIME_TO_SEC(`update_time`) FROM `" + tableName + "` ORDER BY `update_time` DESC LIMIT 1"; LOGS(_log, LOG_LVL_DEBUG, "Executing query: " << query); if (!_conn->runQuery(query, results, errObj)) { LOGS(_log, LOG_LVL_ERROR, "query failed: " << query); @@ -920,7 +924,7 @@ chrono::time_point QMetaMysql::_getChunkMapUpdateTime(lock } } -void QMetaMysql::_addQueryMessage(QueryId queryId, qdisp::QueryMessage const& qMsg, int& cancelCount, +void QMetaMysql::_addQueryMessage(QueryId queryId, qmeta::QueryMessage const& qMsg, int& cancelCount, int& completeCount, int& execFailCount, map& msgCountMap) { // Don't add duplicate messages. if (qMsg.msgSource == "DUPLICATE") return; @@ -981,7 +985,7 @@ void QMetaMysql::_addQueryMessage(QueryId queryId, qdisp::QueryMessage const& qM query += ", " + to_string(qMsg.code); query += ", \"" + _conn->escapeString(severity) + "\""; query += ", \"" + _conn->escapeString(qMsg.description) + "\""; - query += ", " + to_string(qdisp::JobStatus::timeToInt(qMsg.timestamp)); + query += ", " + to_string(qmeta::JobStatus::timeToInt(qMsg.timestamp)); query += ")"; // run query sql::SqlErrorObject errObj; diff --git a/src/qmeta/QMetaMysql.h b/src/qmeta/QMetaMysql.h index 34def90969..14809702c2 100644 --- a/src/qmeta/QMetaMysql.h +++ b/src/qmeta/QMetaMysql.h @@ -41,6 +41,8 @@ class SqlConnection; namespace lsst::qserv::qmeta { +class QueryMessage; + /// @addtogroup qmeta /** @@ -261,11 +263,11 @@ class QMetaMysql : public QMeta { void saveResultQuery(QueryId queryId, std::string const& query) override; /// @see QMeta::addQueryMessages() - void addQueryMessages(QueryId queryId, std::shared_ptr const& msgStore) override; + void addQueryMessages(QueryId queryId, std::shared_ptr const& msgStore) override; /// @see QMeta::getChunkMap - QMeta::ChunkMap getChunkMap(std::chrono::time_point const& prevUpdateTime = - std::chrono::time_point()) override; + QMetaChunkMap getChunkMap(std::chrono::time_point const& prevUpdateTime = + std::chrono::time_point()) override; protected: /// Check that all necessary tables exist @@ -294,7 +296,7 @@ class QMetaMysql : public QMeta { std::lock_guard const& lock); /// Add qMsg to the permanent message table. - void _addQueryMessage(QueryId queryId, qdisp::QueryMessage const& qMsg, int& cancelCount, + void _addQueryMessage(QueryId queryId, qmeta::QueryMessage const& qMsg, int& cancelCount, int& completeCount, int& execFailCount, std::map& msgCountMap); diff --git a/src/qmeta/testQMeta.cc b/src/qmeta/testQMeta.cc index 31cb287b2c..02f6248923 100644 --- a/src/qmeta/testQMeta.cc +++ b/src/qmeta/testQMeta.cc @@ -33,8 +33,9 @@ #include "lsst/log/Log.h" // Qserv headers -#include "QMetaMysql.h" -#include "QStatusMysql.h" +#include "qmeta/MessageStore.h" +#include "qmeta/QMetaMysql.h" +#include "qmeta/QStatusMysql.h" #include "sql/SqlConnection.h" #include "sql/SqlConnectionFactory.h" #include "sql/SqlErrorObject.h" @@ -415,7 +416,7 @@ BOOST_AUTO_TEST_CASE(messWithQueryStats) { BOOST_AUTO_TEST_CASE(getChunkMap) { // The test assumes that the underlying tables exists and it's empty. - QMeta::ChunkMap chunkMap; + QMetaChunkMap chunkMap; BOOST_CHECK_THROW(qMeta->getChunkMap(), EmptyTableError); } diff --git a/src/qproc/TaskMsgFactory.cc b/src/qproc/TaskMsgFactory.cc index ac7a5afe09..8a2d7434dc 100644 --- a/src/qproc/TaskMsgFactory.cc +++ b/src/qproc/TaskMsgFactory.cc @@ -36,6 +36,7 @@ #include // Third-party headers +#include "nlohmann/json.hpp" // LSST headers #include "lsst/log/Log.h" @@ -51,8 +52,70 @@ namespace { LOG_LOGGER _log = LOG_GET("lsst.qserv.qproc.TaskMsgFactory"); } +using namespace std; + namespace lsst::qserv::qproc { +// TODO:UJ - Probaly just delete this +bool TaskMsgFactory::fillTaskMsg(proto::TaskMsg* taskMsg, ChunkQuerySpec const& chunkQuerySpec, + std::string const& chunkResultName, QueryId queryId, int jobId, + int attemptCount, qmeta::CzarId czarId) { + std::string resultTable("Asdfasfd"); + if (!chunkResultName.empty()) { + resultTable = chunkResultName; + } + // shared + taskMsg->set_db(chunkQuerySpec.db); + taskMsg->set_queryid(queryId); + taskMsg->set_jobid(jobId); + taskMsg->set_attemptcount(attemptCount); + taskMsg->set_czarid(czarId); + + // scanTables (for shared scans) + // check if more than 1 db in scanInfo + std::string db; + for (auto const& sTbl : chunkQuerySpec.scanInfo.infoTables) { + if (db.empty()) { + db = sTbl.db; + } + } + + for (auto const& sTbl : chunkQuerySpec.scanInfo.infoTables) { + lsst::qserv::proto::TaskMsg_ScanTable* msgScanTbl = taskMsg->add_scantable(); + sTbl.copyToScanTable(msgScanTbl); + } + + taskMsg->set_scanpriority(chunkQuerySpec.scanInfo.scanRating); + taskMsg->set_scaninteractive(chunkQuerySpec.scanInteractive); + + // per-chunk + taskMsg->set_chunkid(chunkQuerySpec.chunkId); + // per-fragment + // TODO refactor to simplify + if (chunkQuerySpec.nextFragment.get()) { + ChunkQuerySpec const* sPtr = &chunkQuerySpec; + while (sPtr) { + LOGS(_log, LOG_LVL_TRACE, "nextFragment"); + for (unsigned int t = 0; t < (sPtr->queries).size(); t++) { + LOGS(_log, LOG_LVL_TRACE, (sPtr->queries).at(t)); + } + // Linked fragments will not have valid subChunkTables vectors, + // So, we reuse the root fragment's vector. + _addFragment(*taskMsg, resultTable, chunkQuerySpec.subChunkTables, sPtr->subChunkIds, + sPtr->queries); + sPtr = sPtr->nextFragment.get(); + } + } else { + LOGS(_log, LOG_LVL_TRACE, "no nextFragment"); + for (unsigned int t = 0; t < (chunkQuerySpec.queries).size(); t++) { + LOGS(_log, LOG_LVL_TRACE, (chunkQuerySpec.queries).at(t)); + } + _addFragment(*taskMsg, resultTable, chunkQuerySpec.subChunkTables, chunkQuerySpec.subChunkIds, + chunkQuerySpec.queries); + } + return true; +} + std::shared_ptr TaskMsgFactory::_makeMsg(ChunkQuerySpec const& chunkQuerySpec, std::string const& chunkResultName, QueryId queryId, int jobId, int attemptCount, qmeta::CzarId czarId) { @@ -147,4 +210,99 @@ void TaskMsgFactory::serializeMsg(ChunkQuerySpec const& s, std::string const& ch m->SerializeToOstream(&os); } +std::shared_ptr TaskMsgFactory::makeMsgJson(ChunkQuerySpec const& chunkQuerySpec, + std::string const& chunkResultName, + QueryId queryId, int jobId, int attemptCount, + qmeta::CzarId czarId) { + std::string resultTable("Asdfasfd"); + if (!chunkResultName.empty()) { + resultTable = chunkResultName; + } + + // TODO:UJ verify that these can be put in the uberjob to reduce duplicates + // and the size of the message. + auto jsJobMsgPtr = std::shared_ptr( + new nlohmann::json({{"czarId", czarId}, + {"queryId", queryId}, + {"jobId", jobId}, + {"attemptCount", attemptCount}, + {"querySpecDb", chunkQuerySpec.db}, + {"scanPriority", chunkQuerySpec.scanInfo.scanRating}, + {"scanInteractive", chunkQuerySpec.scanInteractive}, + {"maxTableSize", (cconfig::CzarConfig::instance()->getMaxTableSizeMB())}, + {"chunkScanTables", nlohmann::json::array()}, + {"chunkId", chunkQuerySpec.chunkId}, + {"queryFragments", nlohmann::json::array()}})); + + auto& jsJobMsg = *jsJobMsgPtr; + + auto& chunkScanTables = jsJobMsg["chunkScanTables"]; + for (auto const& sTbl : chunkQuerySpec.scanInfo.infoTables) { + nlohmann::json cst = {{"db", sTbl.db}, + {"table", sTbl.table}, + {"lockInMemory", sTbl.lockInMemory}, + {"tblScanRating", sTbl.scanRating}}; + chunkScanTables.push_back(move(cst)); + } + + auto& jsFragments = jsJobMsg["queryFragments"]; + if (chunkQuerySpec.nextFragment.get()) { + ChunkQuerySpec const* sPtr = &chunkQuerySpec; + while (sPtr) { + LOGS(_log, LOG_LVL_TRACE, "nextFragment"); + for (unsigned int t = 0; t < (sPtr->queries).size(); t++) { + LOGS(_log, LOG_LVL_DEBUG, __func__ << " q=" << (sPtr->queries).at(t)); + } + for (auto const& sbi : sPtr->subChunkIds) { + LOGS(_log, LOG_LVL_DEBUG, __func__ << " sbi=" << sbi); + } + // Linked fragments will not have valid subChunkTables vectors, + // So, we reuse the root fragment's vector. + _addFragmentJson(jsFragments, resultTable, chunkQuerySpec.subChunkTables, sPtr->subChunkIds, + sPtr->queries); + sPtr = sPtr->nextFragment.get(); + } + } else { + LOGS(_log, LOG_LVL_TRACE, "no nextFragment"); + for (unsigned int t = 0; t < (chunkQuerySpec.queries).size(); t++) { + LOGS(_log, LOG_LVL_TRACE, (chunkQuerySpec.queries).at(t)); + } + _addFragmentJson(jsFragments, resultTable, chunkQuerySpec.subChunkTables, chunkQuerySpec.subChunkIds, + chunkQuerySpec.queries); + } + + return jsJobMsgPtr; +} + +void TaskMsgFactory::_addFragmentJson(nlohmann::json& jsFragments, std::string const& resultName, + DbTableSet const& subChunkTables, std::vector const& subchunkIds, + std::vector const& queries) { + nlohmann::json jsFrag = {{"resultTable", resultName}, + {"queries", nlohmann::json::array()}, + {"subchunkTables", nlohmann::json::array()}, + {"subchunkIds", nlohmann::json::array()}}; + + auto& jsQueries = jsFrag["queries"]; + for (auto& qry : queries) { + nlohmann::json jsQry = {{"subQuery", qry}}; + jsQueries.push_back(move(jsQry)); + } + + // Add the db+table pairs to the subchunk. + auto& jsSubchunkTables = jsFrag["subchunkTables"]; + for (auto& tbl : subChunkTables) { + nlohmann::json jsSubchunkTbl = {{"scDb", tbl.db}, {"scTable", tbl.table}}; + jsSubchunkTables.push_back(move(jsSubchunkTbl)); + LOGS(_log, LOG_LVL_TRACE, "added dbtbl=" << tbl.db << "." << tbl.table); + } + + // Add subchunk id numbers + auto& jsSubchunkIds = jsFrag["subchunkIds"]; + for (auto& subchunkId : subchunkIds) { + jsSubchunkIds.push_back(subchunkId); + } + + jsFragments.push_back(move(jsFrag)); +} + } // namespace lsst::qserv::qproc diff --git a/src/qproc/TaskMsgFactory.h b/src/qproc/TaskMsgFactory.h index dc2d0ed130..d770d2c5c4 100644 --- a/src/qproc/TaskMsgFactory.h +++ b/src/qproc/TaskMsgFactory.h @@ -35,6 +35,9 @@ #include #include +// Third party headers +#include "nlohmann/json.hpp" + // Qserv headers #include "global/DbTable.h" #include "global/intTypes.h" @@ -58,14 +61,31 @@ class TaskMsgFactory { virtual void serializeMsg(ChunkQuerySpec const& s, std::string const& chunkResultName, QueryId queryId, int jobId, int attemptCount, qmeta::CzarId czarId, std::ostream& os); + /// Use the provided information to fill in taskMsg. + /// @return true if successful. + bool fillTaskMsg(proto::TaskMsg* taskMsg, ChunkQuerySpec const& s, std::string const& chunkResultName, + QueryId queryId, int jobId, int attemptCount, qmeta::CzarId czarId); + + /// Make and return the json message for a single Job. + virtual std::shared_ptr makeMsgJson(ChunkQuerySpec const& s, + std::string const& chunkResultName, QueryId queryId, + int jobId, int attemptCount, qmeta::CzarId czarId); + private: + // TODO:UJ delete when possible std::shared_ptr _makeMsg(ChunkQuerySpec const& s, std::string const& chunkResultName, QueryId queryId, int jobId, int attemptCount, qmeta::CzarId czarId); + // TODO:UJ delete when possible void _addFragment(proto::TaskMsg& taskMsg, std::string const& resultName, DbTableSet const& subChunkTables, std::vector const& subChunkIds, std::vector const& queries); + + /// Make a json message for a single fragment. + void _addFragmentJson(nlohmann::json& jsFragments, std::string const& resultName, + DbTableSet const& subChunkTables, std::vector const& subChunkIds, + std::vector const& queries); }; } // namespace lsst::qserv::qproc diff --git a/src/rproc/InfileMerger.cc b/src/rproc/InfileMerger.cc index 00a713950f..11cb77cdd2 100644 --- a/src/rproc/InfileMerger.cc +++ b/src/rproc/InfileMerger.cc @@ -61,6 +61,7 @@ #include "qdisp/CzarStats.h" #include "qdisp/Executive.h" #include "qdisp/JobQuery.h" +#include "qdisp/UberJob.h" #include "qproc/DatabaseModels.h" #include "query/ColumnRef.h" #include "query/SelectStmt.h" @@ -217,13 +218,13 @@ void InfileMerger::_setQueryIdStr(std::string const& qIdStr) { void InfileMerger::mergeCompleteFor(int jobId) { std::lock_guard resultSzLock(_mtxResultSizeMtx); - _totalResultSize += _perJobResultSize[jobId]; + _totalResultSize += _perJobResultSize[jobId]; // TODO:UJ this can probably be simplified } bool InfileMerger::merge(proto::ResponseSummary const& responseSummary, proto::ResponseData const& responseData, std::shared_ptr const& jq) { - int const jobId = responseSummary.jobid(); + JobId const jobId = responseSummary.jobid(); std::string queryIdJobStr = QueryIdHelper::makeIdStr(responseSummary.queryid(), jobId); if (!_queryIdStrSet) { _setQueryIdStr(QueryIdHelper::makeIdStr(responseSummary.queryid())); @@ -332,6 +333,117 @@ bool InfileMerger::merge(proto::ResponseSummary const& responseSummary, return ret; } +bool InfileMerger::mergeHttp(qdisp::UberJob::Ptr const& uberJob, proto::ResponseData const& responseData) { + UberJobId const uJobId = uberJob->getJobId(); + std::string queryIdJobStr = uberJob->getIdStr(); + if (!_queryIdStrSet) { + _setQueryIdStr(QueryIdHelper::makeIdStr(uberJob->getQueryId())); + } + + // Nothing to do if size is zero. + if (responseData.row_size() == 0) { + return true; + } + + // Do nothing if the query got cancelled for any reason. + if (uberJob->isQueryCancelled()) { + return true; + } + auto executive = uberJob->getExecutive(); + if (executive == nullptr || executive->getCancelled() || executive->isLimitRowComplete()) { + return true; + } + + std::unique_ptr semaLock; + if (_dbEngine != MYISAM) { + // needed for parallel merging with INNODB and MEMORY + semaLock.reset(new util::SemaLock(*_semaMgrConn)); + } + + TimeCountTracker::CALLBACKFUNC cbf = [](TIMEPOINT start, TIMEPOINT end, double bytes, + bool success) { + if (!success) return; + if (std::chrono::duration const seconds = end - start; seconds.count() > 0) { + qdisp::CzarStats::get()->addXRootDSSIRecvRate(bytes / seconds.count()); + } + }; + auto tct = make_shared>(cbf); + + bool ret = false; + // Add columns to rows in virtFile. + util::Timer virtFileT; + virtFileT.start(); + // UberJobs only get one attempt + int resultJobId = makeJobIdAttempt(uberJob->getJobId(), 0); + ProtoRowBuffer::Ptr pRowBuffer = std::make_shared( + responseData, resultJobId, _jobIdColName, _jobIdSqlType, _jobIdMysqlType); + std::string const virtFile = _infileMgr.prepareSrc(pRowBuffer); + std::string const infileStatement = sql::formLoadInfile(_mergeTable, virtFile); + virtFileT.stop(); + + // If the job attempt is invalid, exit without adding rows. + // It will wait here if rows need to be deleted. + if (_invalidJobAttemptMgr.incrConcurrentMergeCount(resultJobId)) { + return true; + } + + size_t const resultSize = responseData.transmitsize(); + size_t tResultSize; + { + std::lock_guard resultSzLock(_mtxResultSizeMtx); + _perJobResultSize[uJobId] += resultSize; + tResultSize = _totalResultSize + _perJobResultSize[uJobId]; + } + if (tResultSize > _maxResultTableSizeBytes) { + std::ostringstream os; + os << queryIdJobStr << " cancelling the query, queryResult table " << _mergeTable + << " is too large at " << tResultSize << " bytes, max allowed size is " << _maxResultTableSizeBytes + << " bytes"; + LOGS(_log, LOG_LVL_ERROR, os.str()); + _error = util::Error(-1, os.str(), -1); + return false; + } + + tct->addToValue(resultSize); + tct->setSuccess(); + tct.reset(); // stop transmit recieve timer before merging happens. + + qdisp::CzarStats::get()->addTotalBytesRecv(resultSize); + qdisp::CzarStats::get()->addTotalRowsRecv(responseData.rowcount()); + + // Stop here (if requested) after collecting stats on the amount of data collected + // from workers. + if (_config.debugNoMerge) { + return true; + } + + auto start = std::chrono::system_clock::now(); + switch (_dbEngine) { + case MYISAM: + ret = _applyMysqlMyIsam(infileStatement, resultSize); + break; + case INNODB: // Fallthrough + case MEMORY: + ret = _applyMysqlInnoDb(infileStatement, resultSize); + break; + default: + throw std::invalid_argument("InfileMerger::_dbEngine is unknown =" + engineToStr(_dbEngine)); + } + auto end = std::chrono::system_clock::now(); + auto mergeDur = std::chrono::duration_cast(end - start); + LOGS(_log, LOG_LVL_DEBUG, + "mergeDur=" << mergeDur.count() << " sema(total=" << _semaMgrConn->getTotalCount() + << " used=" << _semaMgrConn->getUsedCount() << ")"); + if (not ret) { + LOGS(_log, LOG_LVL_ERROR, "InfileMerger::merge mysql applyMysql failure"); + } + _invalidJobAttemptMgr.decrConcurrentMergeCount(); + + LOGS(_log, LOG_LVL_DEBUG, "virtFileT=" << virtFileT.getElapsed() << " mergeDur=" << mergeDur.count()); + + return ret; +} + bool InfileMerger::_applyMysqlMyIsam(std::string const& query, size_t resultSize) { std::unique_lock lock(_mysqlMutex); for (int j = 0; !_mysqlConn.connected(); ++j) { diff --git a/src/rproc/InfileMerger.h b/src/rproc/InfileMerger.h index 116aabaf1c..d8e472c54b 100644 --- a/src/rproc/InfileMerger.h +++ b/src/rproc/InfileMerger.h @@ -54,8 +54,11 @@ class ResponseSummary; } // namespace proto namespace qdisp { class JobQuery; -class MessageStore; +class UberJob; } // namespace qdisp +namespace QMeta { +class MessageStore; +} namespace qproc { class DatabaseModels; } @@ -165,6 +168,9 @@ class InfileMerger { bool merge(proto::ResponseSummary const& responseSummary, proto::ResponseData const& responseData, std::shared_ptr const& jq); + /// Merge the result data collected over Http. + bool mergeHttp(std::shared_ptr const& uberJob, proto::ResponseData const& responseData); + /// Indicate the merge for the job is complete. void mergeCompleteFor(int jobId); @@ -276,7 +282,7 @@ class InfileMerger { std::mutex _queryIdStrMtx; ///< protects _queryIdStr std::atomic _queryIdStrSet{false}; - std::string _queryIdStr{"QI=?"}; ///< Unknown until results start coming back from workers. + std::string _queryIdStr{"QID=?"}; ///< Unknown until results start coming back from workers. std::string _jobIdColName; ///< Name of the jobId column in the result table. int const _jobIdMysqlType{MYSQL_TYPE_LONG}; ///< 4 byte integer. diff --git a/src/wbase/CMakeLists.txt b/src/wbase/CMakeLists.txt index b47024647f..ae1fd984a8 100644 --- a/src/wbase/CMakeLists.txt +++ b/src/wbase/CMakeLists.txt @@ -6,6 +6,7 @@ target_sources(wbase PRIVATE FileChannelShared.cc SendChannel.cc Task.cc + UberJobData.cc UserQueryInfo.cc WorkerCommand.cc ) diff --git a/src/wbase/FileChannelShared.cc b/src/wbase/FileChannelShared.cc index 42a8814822..722d4ea0c6 100644 --- a/src/wbase/FileChannelShared.cc +++ b/src/wbase/FileChannelShared.cc @@ -37,6 +37,7 @@ #include "proto/ProtoHeaderWrap.h" #include "proto/worker.pb.h" #include "wbase/Task.h" +#include "wbase/UberJobData.h" #include "wconfig/WorkerConfig.h" #include "wpublish/QueriesAndChunks.h" #include "util/Bug.h" @@ -267,8 +268,12 @@ shared_ptr FileChannelShared::create(shared_ptr const& sendChannel, qmeta::CzarId czarId, string const& workerId) - : _sendChannel(sendChannel), + : _isUberJob(false), + _sendChannel(sendChannel), + _uberJobId(0), _czarId(czarId), + _czarHostName(""), ///< Name of the czar host. + _czarPort(-1), _workerId(workerId), _protobufArena(make_unique()), _scsId(scsSeqId++) { @@ -278,7 +283,32 @@ FileChannelShared::FileChannelShared(shared_ptr const& sendC } } +FileChannelShared::Ptr FileChannelShared::create(std::shared_ptr const& uberJob, + qmeta::CzarId czarId, string const& czarHostName, + int czarPort, string const& workerId) { + lock_guard const lock(_resultsDirCleanupMtx); + return Ptr(new FileChannelShared(uberJob, czarId, czarHostName, czarPort, workerId)); +} + +FileChannelShared::FileChannelShared(std::shared_ptr const& uberJobData, + qmeta::CzarId czarId, string const& czarHostName, int czarPort, + string const& workerId) + : _isUberJob(true), + _sendChannel(nullptr), + _uberJobData(uberJobData), + _uberJobId(uberJobData->getUberJobId()), + _czarId(czarId), + _czarHostName(czarHostName), + _czarPort(czarPort), + _workerId(workerId), + _protobufArena(make_unique()), + _scsId(scsSeqId++), + _useHttp(true) { + LOGS(_log, LOG_LVL_DEBUG, "FileChannelShared created scsId=" << _scsId << " ujId=" << _uberJobId); +} + FileChannelShared::~FileChannelShared() { + LOGS(_log, LOG_LVL_DEBUG, "~FileChannelShared scsId=" << _scsId << " ujId=" << _uberJobId); // Normally, the channel should not be dead at this time. If it's already // dead it means there was a problem to process a query or send back a response // to Czar. In either case, the file would be useless and it has to be deleted @@ -286,13 +316,15 @@ FileChannelShared::~FileChannelShared() { if (isDead()) { _removeFile(lock_guard(_tMtx)); } - if (_sendChannel != nullptr) { - _sendChannel->setDestroying(); - if (!_sendChannel->isDead()) { - _sendChannel->kill("~FileChannelShared()"); + if (!_useHttp) { + if (_sendChannel != nullptr) { + _sendChannel->setDestroying(); + if (!_sendChannel->isDead()) { + _sendChannel->kill("~FileChannelShared()"); + } } } - LOGS(_log, LOG_LVL_DEBUG, "FileChannelShared deleted"); + LOGS(_log, LOG_LVL_DEBUG, "~FileChannelShared end"); } void FileChannelShared::setTaskCount(int taskCount) { _taskCount = taskCount; } @@ -310,8 +342,12 @@ bool FileChannelShared::kill(string const& note) { } bool FileChannelShared::isDead() { - if (_sendChannel == nullptr) return true; - return _sendChannel->isDead(); + if (!_useHttp) { + if (_sendChannel == nullptr) return true; + return _sendChannel->isDead(); + } else { + return _dead; + } } string FileChannelShared::makeIdStr(int qId, int jId) { @@ -322,11 +358,24 @@ string FileChannelShared::makeIdStr(int qId, int jId) { bool FileChannelShared::buildAndTransmitError(util::MultiError& multiErr, shared_ptr const& task, bool cancelled) { lock_guard const tMtxLock(_tMtx); - if (!_sendResponse(tMtxLock, task, cancelled, multiErr)) { - LOGS(_log, LOG_LVL_ERROR, "Could not transmit the error message to Czar."); - return false; + if (!_useHttp) { + if (!_sendResponse(tMtxLock, task, cancelled, multiErr)) { + LOGS(_log, LOG_LVL_ERROR, "Could not transmit the error message to Czar."); + return false; + } + return true; + } else { + auto ujData = _uberJobData.lock(); + if (ujData == nullptr) { + LOGS(_log, LOG_LVL_WARN, + __func__ << " not sending error as ujData is null " << multiErr.toString()); + return false; + } + // Delete the result file as nobody will come looking for it. + _kill(tMtxLock, " buildAndTransmitError"); + return ujData->responseError(multiErr, task, cancelled); } - return true; + return false; } bool FileChannelShared::buildAndTransmitResult(MYSQL_RES* mResult, shared_ptr const& task, @@ -429,7 +478,16 @@ bool FileChannelShared::buildAndTransmitResult(MYSQL_RES* mResult, shared_ptr const& streamMutexLock, string const& note) { LOGS(_log, LOG_LVL_DEBUG, "FileChannelShared::" << __func__ << " " << note); - return _sendChannel->kill(note); + if (!_useHttp) { + return _sendChannel->kill(note); + } else { + bool oldVal = _dead.exchange(true); + if (!oldVal) { + LOGS(_log, LOG_LVL_WARN, "FileChannelShared first kill call " << note); + } + _removeFile(streamMutexLock); + return oldVal; + } } bool FileChannelShared::_writeToFile(lock_guard const& tMtxLock, shared_ptr const& task, @@ -446,6 +504,7 @@ bool FileChannelShared::_writeToFile(lock_guard const& tMtxLock, shared_p LOGS(_log, LOG_LVL_TRACE, __func__ << " _fillRows " << task->getIdStr() << " end"); _responseData->set_rowcount(rows); _responseData->set_transmitsize(tSize); + ++_headerCount; // Serialize the content of the data buffer into the Protobuf data message // that will be written into the output file. @@ -509,16 +568,22 @@ bool FileChannelShared::_fillRows(lock_guard const& tMtxLock, MYSQL_RES* } void FileChannelShared::_removeFile(lock_guard const& tMtxLock) { - if (!_fileName.empty() && _file.is_open()) { - _file.close(); + LOGS(_log, LOG_LVL_TRACE, "FileChannelShared::_removeFile " << _fileName << " scsId=" << _scsId); + if (!_fileName.empty()) { + if (_file.is_open()) { + _file.close(); + } boost::system::error_code ec; + LOGS(_log, LOG_LVL_DEBUG, "FileChannelShared::" << __func__ << " removing " << _fileName); fs::remove_all(fs::path(_fileName), ec); if (ec.value() != 0) { LOGS(_log, LOG_LVL_WARN, "FileChannelShared::" << __func__ << " failed to remove the result file '" << _fileName << "', ec: " << ec << "."); + return; } } + _fileName.clear(); } bool FileChannelShared::_sendResponse(lock_guard const& tMtxLock, shared_ptr const& task, @@ -548,51 +613,60 @@ bool FileChannelShared::_sendResponse(lock_guard const& tMtxLock, shared_ // Prepare the response object and serialize in into a message that will // be sent to Czar. + if (!_useHttp) { + proto::ResponseSummary response; + response.set_wname(_workerId); + response.set_queryid(queryId); + response.set_jobid(jobId); + response.set_fileresource_xroot(task->resultFileXrootUrl()); + response.set_fileresource_http(task->resultFileHttpUrl()); + response.set_attemptcount(task->getAttemptCount()); + response.set_rowcount(_rowcount); + response.set_transmitsize(_transmitsize); + string errorMsg; + int errorCode = 0; + if (!multiErr.empty()) { + errorMsg = multiErr.toOneLineString(); + errorCode = multiErr.firstErrorCode(); + } else if (cancelled) { + errorMsg = "cancelled"; + errorCode = -1; + } + if (!errorMsg.empty() or (errorCode != 0)) { + errorMsg = "FileChannelShared::" + string(__func__) + " error(s) in result for chunk #" + + to_string(task->getChunkId()) + ": " + errorMsg; + response.set_errormsg(errorMsg); + response.set_errorcode(errorCode); + LOGS(_log, LOG_LVL_ERROR, errorMsg); + } + response.SerializeToString(&_responseBuf); - proto::ResponseSummary response; - response.set_wname(_workerId); - response.set_queryid(queryId); - response.set_jobid(jobId); - response.set_fileresource_xroot(task->resultFileXrootUrl()); - response.set_fileresource_http(task->resultFileHttpUrl()); - response.set_attemptcount(task->getAttemptCount()); - response.set_rowcount(_rowcount); - response.set_transmitsize(_transmitsize); - string errorMsg; - int errorCode = 0; - if (!multiErr.empty()) { - errorMsg = multiErr.toOneLineString(); - errorCode = multiErr.firstErrorCode(); - } else if (cancelled) { - errorMsg = "cancelled"; - errorCode = -1; - } - if (!errorMsg.empty() or (errorCode != 0)) { - errorMsg = "FileChannelShared::" + string(__func__) + " error(s) in result for chunk #" + - to_string(task->getChunkId()) + ": " + errorMsg; - response.set_errormsg(errorMsg); - response.set_errorcode(errorCode); - LOGS(_log, LOG_LVL_ERROR, errorMsg); - } - response.SerializeToString(&_responseBuf); - - LOGS(_log, LOG_LVL_DEBUG, - __func__ << " idStr=" << idStr << ", _responseBuf.size()=" << _responseBuf.size()); + LOGS(_log, LOG_LVL_DEBUG, + __func__ << " idStr=" << idStr << ", _responseBuf.size()=" << _responseBuf.size()); - // Send the message sent out-of-band within the SSI metadata. - if (!_sendChannel->setMetadata(_responseBuf.data(), _responseBuf.size())) { - LOGS(_log, LOG_LVL_ERROR, __func__ << " failed in setMetadata " << idStr); - _kill(streamMutexLock, "setMetadata"); - return false; - } + // Send the message sent out-of-band within the SSI metadata. + if (!_sendChannel->setMetadata(_responseBuf.data(), _responseBuf.size())) { + LOGS(_log, LOG_LVL_ERROR, __func__ << " failed in setMetadata " << idStr); + _kill(streamMutexLock, "setMetadata"); + return false; + } - // Send back the empty object since no info is expected by a caller - // for this type of requests beyond the usual error notifications (if any). - // Note that this call is needed to initiate the transaction. - if (!_sendChannel->sendData((char const*)0, 0)) { - LOGS(_log, LOG_LVL_ERROR, __func__ << " failed in sendData " << idStr); - _kill(streamMutexLock, "sendData"); - return false; + // Send back the empty object since no info is expected by a caller + // for this type of requests beyond the usual error notifications (if any). + // Note that this call is needed to initiate the transaction. + if (!_sendChannel->sendData((char const*)0, 0)) { + LOGS(_log, LOG_LVL_ERROR, __func__ << " failed in sendData " << idStr); + _kill(streamMutexLock, "sendData"); + return false; + } + } else { + auto ujData = _uberJobData.lock(); + if (ujData == nullptr) { + LOGS(_log, LOG_LVL_WARN, __func__ << " uberJobData is nullptr for ujId=" << _uberJobId); + return false; + } + string httpFileUrl = task->resultFileHttpUrl(); + ujData->responseFileReady(httpFileUrl, _rowcount, _transmitsize, _headerCount); } return true; } diff --git a/src/wbase/FileChannelShared.h b/src/wbase/FileChannelShared.h index 0febe6f460..102f87fe24 100644 --- a/src/wbase/FileChannelShared.h +++ b/src/wbase/FileChannelShared.h @@ -57,6 +57,7 @@ class MultiError; } // namespace lsst::qserv::util namespace lsst::qserv::wbase { +class UberJobData; /// The class is responsible for writing mysql result rows as Protobuf /// serialized messages into an output file. Once a task (or all sub-chunk @@ -119,6 +120,11 @@ class FileChannelShared { static Ptr create(std::shared_ptr const& sendChannel, qmeta::CzarId czarId, std::string const& workerId = std::string()); + /// The factory method for handling UberJob over http. + static Ptr create(std::shared_ptr const& uberJob, qmeta::CzarId czarId, + std::string const& czarHostName, int czarPort, + std::string const& workerId); // TODO:UJ delete all params except uberJob + FileChannelShared() = delete; FileChannelShared(FileChannelShared const&) = delete; FileChannelShared& operator=(FileChannelShared const&) = delete; @@ -166,10 +172,14 @@ class FileChannelShared { bool isDead(); private: - /// Private constructor to protect shared pointer integrity. + /// TODO:UJ delete sendchannel version of constructor when possible. FileChannelShared(std::shared_ptr const& sendChannel, qmeta::CzarId czarId, std::string const& workerId); + /// Private constructor to protect shared pointer integrity. + FileChannelShared(std::shared_ptr const& uberJob, qmeta::CzarId czarId, + std::string const& czarHostName, int czarPort, std::string const& workerId); + /// @see wbase::SendChannel::kill /// @param streamMutexLock - Lock on mutex _streamMutex to be acquired before calling the method. bool _kill(std::lock_guard const& streamMutexLock, std::string const& note); @@ -230,9 +240,16 @@ class FileChannelShared { mutable std::mutex _tMtx; ///< Protects data recording and Czar notification + bool _isUberJob; ///< true if this is using UberJob http. To be removed when _sendChannel goes away. + std::shared_ptr const _sendChannel; ///< Used to pass encoded information to XrdSsi. - qmeta::CzarId const _czarId; ///< id of the czar that requested this task(s). - std::string const _workerId; ///< The unique identifier of the worker. + std::weak_ptr _uberJobData; ///< Pointer to UberJobData + + UberJobId const _uberJobId; ///< The UberJobId + qmeta::CzarId const _czarId; ///< id of the czar that requested this task(s). TODO:UJ delete + std::string const _czarHostName; ///< Name of the czar host. TODO:UJ delete + int const _czarPort; ///< port for the czar. TODO:UJ delete + std::string const _workerId; ///< The unique identifier of the worker. TODO:UJ delete // Allocatons/deletion of the data messages are managed by Google Protobuf Arena. std::unique_ptr _protobufArena; @@ -272,6 +289,10 @@ class FileChannelShared { uint32_t _rowcount = 0; ///< The total numnber of rows in all result sets of a query. uint64_t _transmitsize = 0; ///< The total amount of data (bytes) in all result sets of a query. + uint64_t _headerCount = 0; ///< Count of headers received. + + bool const _useHttp = false; ///< to be eliminated when xrootd is no longer used. + std::atomic _dead{false}; ///< Set to true when the contents of the file are no longer useful. }; } // namespace lsst::qserv::wbase diff --git a/src/wbase/Task.cc b/src/wbase/Task.cc index 5687e2ddfb..0448a6af77 100644 --- a/src/wbase/Task.cc +++ b/src/wbase/Task.cc @@ -46,6 +46,7 @@ #include "global/constants.h" #include "global/LogContext.h" #include "global/UnsupportedError.h" +#include "http/RequestBodyJSON.h" #include "mysql/MySqlConfig.h" #include "proto/worker.pb.h" #include "util/Bug.h" @@ -55,6 +56,7 @@ #include "util/TimeUtils.h" #include "wbase/Base.h" #include "wbase/FileChannelShared.h" +#include "wbase/UberJobData.h" #include "wbase/UserQueryInfo.h" #include "wconfig/WorkerConfig.h" #include "wdb/QueryRunner.h" @@ -62,6 +64,7 @@ using namespace std; using namespace std::chrono_literals; +using namespace nlohmann; namespace fs = boost::filesystem; namespace { @@ -78,6 +81,17 @@ string buildResultFilePath(shared_ptr const& taskMs return path.string(); } +string buildUjResultFilePath(lsst::qserv::wbase::UberJobData::Ptr const& ujData, + string const& resultsDirname) { + if (resultsDirname.empty()) return resultsDirname; + fs::path path(resultsDirname); + // UberJobs have multiple chunks which can each have different attempt numbers. + // However, each CzarID + UberJobId should be unique as UberJobs are not retried. + path /= to_string(ujData->getCzarId()) + "-" + to_string(ujData->getQueryId()) + "-" + + to_string(ujData->getUberJobId()) + "-0" + ".proto"; + return path.string(); +} + size_t const MB_SIZE_BYTES = 1024 * 1024; } // namespace @@ -101,7 +115,6 @@ bool Task::ChunkIdGreater::operator()(Task::Ptr const& x, Task::Ptr const& y) { } string const Task::defaultUser = "qsmaster"; -IdSet Task::allIds{}; TaskScheduler::TaskScheduler() { auto hour = chrono::milliseconds(1h); @@ -132,7 +145,6 @@ Task::Task(TaskMsgPtr const& t, int fragmentNumber, shared_ptr co _attemptCount(t->attemptcount()), _queryFragmentNum(fragmentNumber), _fragmentHasSubchunks(t->fragment(fragmentNumber).has_subchunks()), - _hasDb(t->has_db()), _db(t->has_db() ? t->db() : ""), _czarId(t->has_czarid() ? t->czarid() : -1) { // These attributes will be passed back to Czar in the Protobuf response @@ -158,11 +170,6 @@ Task::Task(TaskMsgPtr const& t, int fragmentNumber, shared_ptr co user = defaultUser; } - allIds.add(to_string(_qId) + "_" + to_string(_jId)); - LOGS(_log, LOG_LVL_DEBUG, - "Task(...) " - << "this=" << this << " : " << allIds); - // Determine which major tables this task will use. int const size = t->scantable_size(); for (int j = 0; j < size; ++j) { @@ -216,10 +223,81 @@ Task::Task(TaskMsgPtr const& t, int fragmentNumber, shared_ptr co } } -Task::~Task() { - allIds.remove(to_string(_qId) + "_" + to_string(_jId)); - LOGS(_log, LOG_LVL_TRACE, "~Task() : " << allIds); +/// When the constructor is called, there is not enough information +/// available to define the action to take when this task is run, so +/// Command::setFunc() is used set the action later. This is why +/// the util::CommandThreadPool is not called here. +Task::Task(UberJobData::Ptr const& ujData, int jobId, int attemptCount, int chunkId, int fragmentNumber, + shared_ptr const& userQueryInfo, size_t templateId, bool hasSubchunks, + int subchunkId, string const& db, proto::ScanInfo const& scanInfo, bool scanInteractive, + int maxTableSize, vector const& fragSubTables, vector const& fragSubchunkIds, + shared_ptr const& sc, uint16_t resultsHttpPort) + : _userQueryInfo(userQueryInfo), + _sendChannel(sc), + _tSeq(++taskSequence), + _qId(ujData->getQueryId()), + _templateId(templateId), + _hasChunkId((chunkId >= 0)), + _chunkId(chunkId), + _subchunkId(subchunkId), + _jId(jobId), + _attemptCount(attemptCount), + _queryFragmentNum(fragmentNumber), + _fragmentHasSubchunks(hasSubchunks), + _db(db), + _czarId(ujData->getCzarId()), + _scanInfo(scanInfo), + _scanInteractive(scanInteractive), + _maxTableSize(maxTableSize * ::MB_SIZE_BYTES) { + // These attributes will be passed back to Czar in the Protobuf response + // to advice which result delivery channel to use. + auto const workerConfig = wconfig::WorkerConfig::instance(); + auto const resultDeliveryProtocol = workerConfig->resultDeliveryProtocol(); + _resultFilePath = ::buildUjResultFilePath(ujData, workerConfig->resultsDirname()); + auto const fqdn = util::get_current_host_fqdn(); + if (resultDeliveryProtocol == wconfig::ConfigValResultDeliveryProtocol::HTTP) { + // TODO:UJ it seems like this should just be part of the FileChannelShared??? + _resultFileHttpUrl = "http://" + fqdn + ":" + to_string(resultsHttpPort) + _resultFilePath; + } else { + throw runtime_error("wbase::Task::Task: unsupported results delivery protocol: " + + wconfig::ConfigValResultDeliveryProtocol::toString(resultDeliveryProtocol)); + } + user = defaultUser; + + // Create sets and vectors for 'aquiring' subchunk temporary tables. + // Fill in _dbTblsAndSubchunks + DbTableSet dbTbls_; + IntVector subchunksVect_; + if (!_fragmentHasSubchunks) { + /// FUTURE: Why acquire anything if there are no subchunks in the fragment? + /// This branch never seems to happen, but this needs to be proven beyond any doubt. + for (auto const& scanTbl : scanInfo.infoTables) { + dbTbls_.emplace(scanTbl.db, scanTbl.table); + LOGS(_log, LOG_LVL_INFO, + "Task::Task scanTbl.db=" << scanTbl.db << " scanTbl.table=" << scanTbl.table); + } + LOGS(_log, LOG_LVL_INFO, + "fragment a db=" << _db << ":" << _chunkId << " dbTbls=" << util::printable(dbTbls_)); + } else { + for (TaskDbTbl const& fDbTbl : fragSubTables) { + /// Different subchunk fragments can require different tables. + /// FUTURE: It may save space to store these in UserQueryInfo as it seems + /// database and table names are consistent across chunks. + dbTbls_.emplace(fDbTbl.db, fDbTbl.tbl); + LOGS(_log, LOG_LVL_TRACE, + "Task::Task subchunk fDbTbl.db=" << fDbTbl.db << " fDbTbl.tbl=" << fDbTbl.tbl); + } + subchunksVect_ = fragSubchunkIds; + LOGS(_log, LOG_LVL_DEBUG, + "fragment b db=" << _db << ":" << _chunkId << " dbTableSet" << util::printable(dbTbls_) + << " subChunks=" << util::printable(subchunksVect_)); + } + + _dbTblsAndSubchunks = make_unique(dbTbls_, subchunksVect_); +} + +Task::~Task() { _userQueryInfo.reset(); UserQueryInfo::uqMapErase(_qId); if (UserQueryInfo::uqMapGet(_qId) == nullptr) { @@ -275,6 +353,110 @@ vector Task::createTasks(shared_ptr const& taskMsg, return vect; } +std::vector Task::createTasksForChunk( + std::shared_ptr const& ujData, nlohmann::json const& jsJobs, + std::shared_ptr const& sendChannel, proto::ScanInfo const& scanInfo, + bool scanInteractive, int maxTableSizeMb, + std::shared_ptr const& chunkResourceMgr, mysql::MySqlConfig const& mySqlConfig, + std::shared_ptr const& sqlConnMgr, + std::shared_ptr const& queriesAndChunks, uint16_t resultsHttpPort) { + QueryId qId = ujData->getQueryId(); + UberJobId ujId = ujData->getUberJobId(); + + UserQueryInfo::Ptr userQueryInfo = UserQueryInfo::uqMapInsert(qId); + + string funcN(__func__); + funcN += " QID=" + to_string(qId) + " "; + + vector vect; + for (auto const& job : jsJobs) { + json const& jsJobDesc = job["jobdesc"]; + http::RequestBodyJSON rbJobDesc(jsJobDesc); + // See qproc::TaskMsgFactory::makeMsgJson for message construction. + auto const jdCzarId = rbJobDesc.required("czarId"); + auto const jdQueryId = rbJobDesc.required("queryId"); + if (jdQueryId != qId) { + throw TaskException(ERR_LOC, string("ujId=") + to_string(ujId) + " qId=" + to_string(qId) + + " QueryId mismatch Job qId=" + to_string(jdQueryId)); + } + auto const jdJobId = rbJobDesc.required("jobId"); + auto const jdAttemptCount = rbJobDesc.required("attemptCount"); + auto const jdQuerySpecDb = rbJobDesc.required("querySpecDb"); + auto const jdScanPriority = rbJobDesc.required("scanPriority"); + auto const jdScanInteractive = rbJobDesc.required("scanInteractive"); + auto const jdMaxTableSizeMb = rbJobDesc.required("maxTableSize"); + auto const jdChunkId = rbJobDesc.required("chunkId"); + LOGS(_log, LOG_LVL_TRACE, + funcN << " jd cid=" << jdCzarId << " jdQId=" << jdQueryId << " jdJobId=" << jdJobId + << " jdAtt=" << jdAttemptCount << " jdQDb=" << jdQuerySpecDb + << " jdScanPri=" << jdScanPriority << " interactive=" << jdScanInteractive + << " maxTblSz=" << jdMaxTableSizeMb << " chunkId=" << jdChunkId); + + auto const jdQueryFragments = rbJobDesc.required("queryFragments"); + int fragmentNumber = 0; + for (auto const& frag : jdQueryFragments) { + vector fragSubQueries; + vector fragSubchunkIds; + vector fragSubTables; + LOGS(_log, LOG_LVL_DEBUG, funcN << " frag=" << frag); + http::RequestBodyJSON rbFrag(frag); + auto const& jsQueries = rbFrag.required("queries"); + // TODO:UJ move to uberjob???, these should be the same for all jobs + for (auto const& subQ : jsQueries) { + http::RequestBodyJSON rbSubQ(subQ); + auto const subQuery = rbSubQ.required("subQuery"); + LOGS(_log, LOG_LVL_DEBUG, funcN << " subQuery=" << subQuery); + fragSubQueries.push_back(subQuery); + } + auto const& resultTable = rbFrag.required("resultTable"); + auto const& jsSubIds = rbFrag.required("subchunkIds"); + for (auto const& scId : jsSubIds) { + fragSubchunkIds.push_back(scId); + } + auto const& jsSubTables = rbFrag.required("subchunkTables"); + + for (auto const& scDbTable : jsSubTables) { // TODO:UJ are these the same for all jobs? + http::RequestBodyJSON rbScDbTable(scDbTable); + string scDb = rbScDbTable.required("scDb"); + string scTable = rbScDbTable.required("scTable"); + TaskDbTbl scDbTbl(scDb, scTable); + fragSubTables.push_back(scDbTbl); + } + + for (string const& fragSubQ : fragSubQueries) { + size_t templateId = userQueryInfo->addTemplate(fragSubQ); + if (fragSubchunkIds.empty()) { + bool const noSubchunks = false; + int const subchunkId = -1; + auto task = Task::Ptr(new Task( + ujData, jdJobId, jdAttemptCount, jdChunkId, fragmentNumber, userQueryInfo, + templateId, noSubchunks, subchunkId, jdQuerySpecDb, scanInfo, scanInteractive, + maxTableSizeMb, fragSubTables, fragSubchunkIds, sendChannel, resultsHttpPort)); + vect.push_back(task); + } else { + for (auto subchunkId : fragSubchunkIds) { + bool const hasSubchunks = true; + auto task = Task::Ptr(new Task(ujData, jdJobId, jdAttemptCount, jdChunkId, + fragmentNumber, userQueryInfo, templateId, + hasSubchunks, subchunkId, jdQuerySpecDb, scanInfo, + scanInteractive, maxTableSizeMb, fragSubTables, + fragSubchunkIds, sendChannel, resultsHttpPort)); + vect.push_back(task); + } + } + } + ++fragmentNumber; + } + } + + for (auto taskPtr : vect) { + // newQueryRunner sets the `_taskQueryRunner` pointer in `task`. + taskPtr->setTaskQueryRunner(wdb::QueryRunner::newQueryRunner(taskPtr, chunkResourceMgr, mySqlConfig, + sqlConnMgr, queriesAndChunks)); + } + return vect; +} + void Task::action(util::CmdData* data) { string tIdStr = getIdStr(); if (_queryStarted.exchange(true)) { diff --git a/src/wbase/Task.h b/src/wbase/Task.h index 71589c3c48..b6f57456e4 100644 --- a/src/wbase/Task.h +++ b/src/wbase/Task.h @@ -71,8 +71,14 @@ class QueryStatistics; namespace lsst::qserv::wbase { +class UberJobData; class UserQueryInfo; +class TaskException : public util::Issue { +public: + explicit TaskException(util::Issue::Context const& ctx, std::string const& msg) : util::Issue(ctx, msg) {} +}; + /// Base class for tracking a database query for a worker Task. class TaskQueryRunner { public: @@ -82,6 +88,15 @@ class TaskQueryRunner { virtual void cancel() = 0; ///< Repeated calls to cancel() must be harmless. }; +/// Class for storing database + table name. +class TaskDbTbl { +public: + TaskDbTbl() = delete; + TaskDbTbl(std::string const& db_, std::string const& tbl_) : db(db_), tbl(tbl_) {} + std::string const db; + std::string const tbl; +}; + class Task; /// Base class for scheduling Tasks. @@ -104,7 +119,7 @@ class TaskScheduler { /// failure and should probably be removed when it is no longer needed. /// It depends on code in BlendScheduler to work. If the decision is made to keep it /// forever, dependency on BlendScheduler needs to be re-worked. -struct IdSet { +struct IdSet { // TODO:UJ delete if possible void add(std::string const& id) { std::lock_guard lock(mx); _ids.insert(id); @@ -159,6 +174,19 @@ class Task : public util::CommandForThreadPool { Task(TaskMsgPtr const& t, int fragmentNumber, std::shared_ptr const& userQueryInfo, size_t templateId, int subchunkId, std::shared_ptr const& sc, uint16_t resultsHttpPort = 8080); + // TODO:UJ too many parameters. + // - fragmentNumber seems pointless + // - hasSubchunks seems redundant. + // Hopefully, many are the same for all tasks and can be moved to ujData and userQueryInfo. + // Candidates: scanInfo, maxTableSizeMb, FileChannelShared, resultsHttpPort. + // Unfortunately, this will be much easier if it is done after xrootd method is removed. + Task(std::shared_ptr const& ujData, int jobId, int attemptCount, int chunkId, + int fragmentNumber, std::shared_ptr const& userQueryInfo, size_t templateId, + bool hasSubchunks, int subchunkId, std::string const& db, proto::ScanInfo const& scanInfo, + bool scanInteractive, int maxTableSizeMb, std::vector const& fragSubTables, + std::vector const& fragSubchunkIds, std::shared_ptr const& sc, + uint16_t resultsHttpPort = 8080); + Task& operator=(const Task&) = delete; Task(const Task&) = delete; virtual ~Task(); @@ -172,6 +200,16 @@ class Task : public util::CommandForThreadPool { std::shared_ptr const& queriesAndChunks, uint16_t resultsHttpPort = 8080); + /// Read json to generate a vector of one or more task for a chunk. + static std::vector createTasksForChunk( + std::shared_ptr const& ujData, nlohmann::json const& jsJobs, + std::shared_ptr const& sendChannel, proto::ScanInfo const& scanInfo, + bool scanInteractive, int maxTableSizeMb, + std::shared_ptr const& chunkResourceMgr, + mysql::MySqlConfig const& mySqlConfig, std::shared_ptr const& sqlConnMgr, + std::shared_ptr const& queriesAndChunks, + uint16_t resultsHttpPort = 8080); + void setQueryStatistics(std::shared_ptr const& qC); std::shared_ptr getSendChannel() const { return _sendChannel; } @@ -206,7 +244,6 @@ class Task : public util::CommandForThreadPool { TaskState state() const { return _state; } std::string getQueryString() const; - int getQueryFragmentNum() { return _queryFragmentNum; } std::string const& resultFilePath() const { return _resultFilePath; } std::string const& resultFileXrootUrl() const { return _resultFileXrootUrl; } std::string const& resultFileHttpUrl() const { return _resultFileHttpUrl; } @@ -326,7 +363,6 @@ class Task : public util::CommandForThreadPool { int const _attemptCount = 0; ///< attemptCount from czar int const _queryFragmentNum; ///< The fragment number of the query in the task message. bool const _fragmentHasSubchunks; ///< True if the fragment in this query has subchunks. - bool const _hasDb; ///< true if db was in message from czar. std::string _db; ///< Task database int const _czarId; ///< czar Id from the task message. diff --git a/src/wbase/UberJobData.cc b/src/wbase/UberJobData.cc new file mode 100644 index 0000000000..d969b80b7f --- /dev/null +++ b/src/wbase/UberJobData.cc @@ -0,0 +1,180 @@ +/* + * LSST Data Management System + * + * This product includes software developed by the + * LSST Project (http://www.lsst.org/). + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the LSST License Statement and + * the GNU General Public License along with this program. If not, + * see . + */ + +// Class header +#include "wbase/UberJobData.h" + +// System headers + +// Third party headers + +// LSST headers +#include "lsst/log/Log.h" + +// Qserv headers +#include "http/Client.h" +#include "http/Exceptions.h" +#include "http/MetaModule.h" +#include "http/Method.h" +#include "http/RequestBodyJSON.h" +#include "http/RequestQuery.h" +#include "util/Bug.h" +#include "util/MultiError.h" +#include "wcontrol/Foreman.h" +#include "wpublish/ChunkInventory.h" +#include "wpublish/QueriesAndChunks.h" + +using namespace std; +using namespace nlohmann; + +namespace { + +LOG_LOGGER _log = LOG_GET("lsst.qserv.wbase.UberJobData"); + +} // namespace + +namespace lsst::qserv::wbase { + +UberJobData::UberJobData(UberJobId uberJobId, std::string const& czarName, qmeta::CzarId czarId, + std::string czarHost, int czarPort, uint64_t queryId, std::string const& workerId, + std::shared_ptr const& foreman, std::string const& authKey) + : _uberJobId(uberJobId), + _czarName(czarName), + _czarId(czarId), + _czarHost(czarHost), + _czarPort(czarPort), + _queryId(queryId), + _workerId(workerId), + _authKey(authKey), + _foreman(foreman), + _idStr(string("QID=") + to_string(_queryId) + ":ujId=" + to_string(_uberJobId)) {} + +void UberJobData::setFileChannelShared(std::shared_ptr const& fileChannelShared) { + if (_fileChannelShared != nullptr && _fileChannelShared != fileChannelShared) { + throw util::Bug(ERR_LOC, string(__func__) + " Trying to change _fileChannelShared"); + } + _fileChannelShared = fileChannelShared; +} + +void UberJobData::responseFileReady(string const& httpFileUrl, uint64_t rowCount, uint64_t fileSize, + uint64_t headerCount) { + string const funcN = cName(__func__); + LOGS(_log, LOG_LVL_TRACE, + funcN << " httpFileUrl=" << httpFileUrl << " rows=" << rowCount << " fSize=" << fileSize + << " headerCount=" << headerCount); + + json request = {{"version", http::MetaModule::version}, + {"workerid", _foreman->chunkInventory()->id()}, + {"auth_key", _authKey}, + {"czar", _czarName}, + {"czarid", _czarId}, + {"queryid", _queryId}, + {"uberjobid", _uberJobId}, + {"fileUrl", httpFileUrl}, + {"rowCount", rowCount}, + {"fileSize", fileSize}, + {"headerCount", headerCount}}; + + auto const method = http::Method::POST; + vector const headers = {"Content-Type: application/json"}; + string const url = "http://" + _czarHost + ":" + to_string(_czarPort) + "/queryjob-ready"; + string const requestContext = "Worker: '" + http::method2string(method) + "' request to '" + url + "'"; + http::Client client(method, url, request.dump(), headers); + + int maxTries = 2; // TODO:UJ set from config + bool transmitSuccess = false; + for (int j = 0; (!transmitSuccess && j < maxTries); ++j) { + try { + json const response = client.readAsJson(); + if (0 != response.at("success").get()) { + transmitSuccess = true; + } else { + LOGS(_log, LOG_LVL_WARN, funcN << "Transmit success == 0"); + j = maxTries; /// There's no point in resending as the czar got the message and didn't like + /// it. + } + } catch (exception const& ex) { + LOGS(_log, LOG_LVL_WARN, funcN + " " + requestContext + " failed, ex: " + ex.what()); + } + } + + if (!transmitSuccess) { + LOGS(_log, LOG_LVL_ERROR, + funcN << "TODO:UJ NEED CODE Let czar find out through polling worker status??? Just throw the " + "result away???"); + } +} + +bool UberJobData::responseError(util::MultiError& multiErr, std::shared_ptr const& task, + bool cancelled) { + string const funcN = cName(__func__); + LOGS(_log, LOG_LVL_INFO, funcN); + string errorMsg; + int errorCode = 0; + if (!multiErr.empty()) { + errorMsg = multiErr.toOneLineString(); + errorCode = multiErr.firstErrorCode(); + } else if (cancelled) { + errorMsg = "cancelled"; + errorCode = -1; + } + if (!errorMsg.empty() or (errorCode != 0)) { + errorMsg = + funcN + " error(s) in result for chunk #" + to_string(task->getChunkId()) + ": " + errorMsg; + LOGS(_log, LOG_LVL_ERROR, errorMsg); + } + + json request = {{"version", http::MetaModule::version}, + {"workerid", _foreman->chunkInventory()->id()}, + {"auth_key", _authKey}, + {"czar", _czarName}, + {"czarid", _czarId}, + {"queryid", _queryId}, + {"uberjobid", _uberJobId}, + {"errorCode", errorCode}, + {"errorMsg", errorMsg}}; + + auto const method = http::Method::POST; + vector const headers = {"Content-Type: application/json"}; + string const url = "http://" + _czarHost + ":" + to_string(_czarPort) + "/queryjob-error"; + string const requestContext = "Worker: '" + http::method2string(method) + "' request to '" + url + "'"; + http::Client client(method, url, request.dump(), headers); + + int maxTries = 2; // TODO:UJ set from config + bool transmitSuccess = false; + for (int j = 0; !transmitSuccess && j < maxTries; ++j) { + try { + json const response = client.readAsJson(); + if (0 != response.at("success").get()) { + transmitSuccess = true; + } else { + LOGS(_log, LOG_LVL_WARN, funcN << " transmit success == 0"); + j = maxTries; /// There's no point in resending as the czar got the message and didn't like + /// it. + } + } catch (exception const& ex) { + LOGS(_log, LOG_LVL_WARN, funcN + " " + requestContext + " failed, ex: " + ex.what()); + } + } + return transmitSuccess; +} + +} // namespace lsst::qserv::wbase diff --git a/src/wbase/UberJobData.h b/src/wbase/UberJobData.h new file mode 100644 index 0000000000..f4ab4e3030 --- /dev/null +++ b/src/wbase/UberJobData.h @@ -0,0 +1,121 @@ +/* + * LSST Data Management System + * + * This product includes software developed by the + * LSST Project (http://www.lsst.org/). + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the LSST License Statement and + * the GNU General Public License along with this program. If not, + * see . + */ + +#ifndef LSST_QSERV_WBASE_UBERJOBDATA_H +#define LSST_QSERV_WBASE_UBERJOBDATA_H + +// System headers +#include +#include +#include +#include +#include + +// Third-party headers +#include "nlohmann/json.hpp" + +// Qserv headers +#include "global/intTypes.h" +#include "qmeta/types.h" +#include "wbase/SendChannel.h" + +namespace lsst::qserv { + +namespace util { +class MultiError; +} + +namespace wcontrol { +class Foreman; +} +} // namespace lsst::qserv + +namespace lsst::qserv::wbase { + +class FileChannelShared; +class Task; + +/// This class tracks all Tasks associates with the UberJob on the worker +/// and reports status to the czar. +class UberJobData { +public: + using Ptr = std::shared_ptr; + + UberJobData() = delete; + UberJobData(UberJobData const&) = delete; + + static Ptr create(UberJobId uberJobId, std::string const& czarName, qmeta::CzarId czarId, + std::string const& czarHost, int czarPort, uint64_t queryId, + std::string const& workerId, std::shared_ptr const& foreman, + std::string const& authKey) { + return Ptr(new UberJobData(uberJobId, czarName, czarId, czarHost, czarPort, queryId, workerId, + foreman, authKey)); + } + /// Set file channel for this UberJob + void setFileChannelShared(std::shared_ptr const& fileChannelShared); + + UberJobId getUberJobId() const { return _uberJobId; } + qmeta::CzarId getCzarId() const { return _czarId; } + std::string getCzarHost() const { return _czarHost; } + int getCzarPort() const { return _czarPort; } + uint64_t getQueryId() const { return _queryId; } + std::string getWorkerId() const { return _workerId; } + + /// Add the tasks defined in the UberJob to this UberJobData object. + void addTasks(std::vector> const& tasks) { + _ujTasks.insert(_ujTasks.end(), tasks.begin(), tasks.end()); + } + + /// Let the czar know the result is ready. + void responseFileReady(std::string const& httpFileUrl, uint64_t rowCount, uint64_t fileSize, + uint64_t headerCount); // TODO:UJ remove headerCount + + /// Let the Czar know there's been a problem. + bool responseError(util::MultiError& multiErr, std::shared_ptr const& task, bool cancelled); + + std::string getIdStr() const { return _idStr; } + std::string cName(std::string const& funcName) { return "UberJobData::" + funcName + " " + getIdStr(); } + +private: + UberJobData(UberJobId uberJobId, std::string const& czarName, qmeta::CzarId czarId, std::string czarHost, + int czarPort, uint64_t queryId, std::string const& workerId, + std::shared_ptr const& foreman, std::string const& authKey); + + UberJobId const _uberJobId; + std::string const _czarName; + qmeta::CzarId const _czarId; + std::string const _czarHost; + int const _czarPort; + QueryId const _queryId; + std::string const _workerId; + std::string const _authKey; + + std::shared_ptr const _foreman; + + std::vector> _ujTasks; + std::shared_ptr _fileChannelShared; + + std::string const _idStr; +}; + +} // namespace lsst::qserv::wbase + +#endif // LSST_QSERV_WBASE_UBERJOBDATA_H diff --git a/src/wbase/UserQueryInfo.cc b/src/wbase/UserQueryInfo.cc index 846be63fc4..79c24f07ed 100644 --- a/src/wbase/UserQueryInfo.cc +++ b/src/wbase/UserQueryInfo.cc @@ -24,6 +24,7 @@ // Qserv headers #include "util/Bug.h" +#include "wbase/UberJobData.h" // LSST headers #include "lsst/log/Log.h" @@ -101,4 +102,10 @@ std::string UserQueryInfo::getTemplate(size_t id) { return _templates[id]; } +void UserQueryInfo::addUberJob(std::shared_ptr const& ujData) { + lock_guard lockUq(_uberJobMapMtx); + UberJobId ujId = ujData->getUberJobId(); + _uberJobMap[ujId] = ujData; +} + } // namespace lsst::qserv::wbase diff --git a/src/wbase/UserQueryInfo.h b/src/wbase/UserQueryInfo.h index 27a7bb490a..4b7a799f03 100644 --- a/src/wbase/UserQueryInfo.h +++ b/src/wbase/UserQueryInfo.h @@ -35,6 +35,8 @@ // This header declarations namespace lsst::qserv::wbase { +class UberJobData; + /// This class contains information about a user query that is effectively the same /// for all Task's in the user query. class UserQueryInfo { @@ -63,6 +65,9 @@ class UserQueryInfo { /// @throws Bug if id is out of range. std::string getTemplate(size_t id); + /// Add an UberJobData object to the UserQueryInfo. + void addUberJob(std::shared_ptr const& ujData); + private: static Map _uqMap; static std::mutex _uqMapMtx; ///< protects _uqMap @@ -74,6 +79,10 @@ class UserQueryInfo { /// to alter existing indexes into the vector. std::vector _templates; std::mutex _uqMtx; ///< protects _templates; + + /// Map of all UberJobData objects on this worker for this User Query. + std::map> _uberJobMap; + std::mutex _uberJobMapMtx; ///< protects _uberJobMap; }; } // namespace lsst::qserv::wbase diff --git a/src/wsched/ChunkTasksQueue.cc b/src/wsched/ChunkTasksQueue.cc index 5b1889ab8d..de2a09bbbc 100644 --- a/src/wsched/ChunkTasksQueue.cc +++ b/src/wsched/ChunkTasksQueue.cc @@ -37,21 +37,18 @@ LOG_LOGGER _log = LOG_GET("lsst.qserv.wsched.ChunkTasksQueue"); namespace lsst::qserv::wsched { -/// Queue a Task with other tasks on the same chunk. +/// Queue tasks from an uberjob. void ChunkTasksQueue::queueTask(std::vector const& tasks) { std::lock_guard lg(_mapMx); auto iter = _chunkMap.end(); + int prevChunkId = -1; // invalid chunkId number for (auto const& task : tasks) { int chunkId = task->getChunkId(); - if (iter != _chunkMap.end() && iter->first != chunkId) { - LOGS(_log, LOG_LVL_ERROR, - "All tasks grouped together must be on the same chunk." - << " chunkA=" << iter->first << " chunkB=" << chunkId); - throw util::Bug(ERR_LOC, "ChunkTasksQueue::queueTask mismatched chunkIds"); - } - /// If it's the first time through, or the chunkId is different than the previous one, then - /// find the correct ChunkTask. - if (iter == _chunkMap.end() || iter->first != chunkId) { + // If it's the first time through, or the chunkId is different than the previous one, then + // find the correct ChunkTask. UberJobs are constructed in a way that makes it likely + // that subchunks for the same chunk will be grouped together in `tasks`. + if (iter == _chunkMap.end() || prevChunkId != chunkId) { + prevChunkId = chunkId; iter = _chunkMap.find(chunkId); if (iter == _chunkMap.end()) { // Correct ChunkTask wasn't found, make a new one. diff --git a/src/wsched/ScanScheduler.cc b/src/wsched/ScanScheduler.cc index 103c6751c7..06a489c855 100644 --- a/src/wsched/ScanScheduler.cc +++ b/src/wsched/ScanScheduler.cc @@ -224,29 +224,29 @@ void ScanScheduler::queCmd(vector const& cmds) { int jid = 0; // Convert to a vector of tasks for (auto const& cmd : cmds) { - wbase::Task::Ptr t = dynamic_pointer_cast(cmd); - if (t == nullptr) { + wbase::Task::Ptr tsk = dynamic_pointer_cast(cmd); + if (tsk == nullptr) { throw util::Bug(ERR_LOC, getName() + " queCmd could not be converted to Task or was nullptr"); } if (first) { first = false; - qid = t->getQueryId(); - jid = t->getJobId(); + qid = tsk->getQueryId(); + jid = tsk->getJobId(); QSERV_LOGCONTEXT_QUERY_JOB(qid, jid); } else { - if (qid != t->getQueryId() || jid != t->getJobId()) { - LOGS(_log, LOG_LVL_ERROR, - " mismatch multiple query/job ids in single queCmd " - << " expected QID=" << qid << " got=" << t->getQueryId() - << " expected JID=" << jid << " got=" << t->getJobId()); + if (qid != tsk->getQueryId()) { + string eMsg("Mismatch multiple query/job ids in single queCmd "); + eMsg += " expected QID=" + to_string(qid) + " got=" + to_string(tsk->getQueryId()); + eMsg += " expected JID=" + to_string(qid) + " got=" + to_string(tsk->getJobId()); + LOGS(_log, LOG_LVL_ERROR, eMsg); // This could cause difficult to detect problems later on. - throw util::Bug(ERR_LOC, "Mismatch multiple query/job ids in single queCmd"); + throw util::Bug(ERR_LOC, eMsg); return; } } - t->setMemMan(_memMan); - tasks.push_back(t); - LOGS(_log, LOG_LVL_INFO, getName() << " queCmd " << t->getIdStr()); + tsk->setMemMan(_memMan); + tasks.push_back(tsk); + LOGS(_log, LOG_LVL_INFO, getName() << " queCmd " << tsk->getIdStr()); } // Queue the tasks { diff --git a/src/xrdsvc/CMakeLists.txt b/src/xrdsvc/CMakeLists.txt index 2babecab97..072fdd99f0 100644 --- a/src/xrdsvc/CMakeLists.txt +++ b/src/xrdsvc/CMakeLists.txt @@ -6,6 +6,7 @@ target_sources(qserv_xrdsvc PRIVATE HttpModule.cc HttpMonitorModule.cc HttpReplicaMgtModule.cc + HttpWorkerCzarModule.cc HttpSvc.cc SsiProvider.cc SsiRequest.cc diff --git a/src/xrdsvc/HttpReplicaMgtModule.cc b/src/xrdsvc/HttpReplicaMgtModule.cc index afa81d74c8..14fdde32af 100644 --- a/src/xrdsvc/HttpReplicaMgtModule.cc +++ b/src/xrdsvc/HttpReplicaMgtModule.cc @@ -28,6 +28,7 @@ #include // Third party headers +#include "lsst/log/Log.h" #include "XrdSsi/XrdSsiCluster.hh" // Qserv headers @@ -48,6 +49,10 @@ extern XrdSsiProvider* XrdSsiProviderLookup; using namespace std; using json = nlohmann::json; +namespace { +LOG_LOGGER _log = LOG_GET("lsst.qserv.xrdsvc.HttpReplicaMgt"); +} + namespace { // These markers if reported in the extended error response object of the failed // requests could be used by a caller for refining the completion status @@ -80,7 +85,6 @@ HttpReplicaMgtModule::HttpReplicaMgtModule(string const& context, json HttpReplicaMgtModule::executeImpl(string const& subModuleName) { string const func = string(__func__) + "[sub-module='" + subModuleName + "']"; - debug(func); enforceInstanceId(func, wconfig::WorkerConfig::instance()->replicationInstanceId()); enforceWorkerId(func); if (subModuleName == "GET") diff --git a/src/xrdsvc/HttpSvc.cc b/src/xrdsvc/HttpSvc.cc index f30c82a0c1..49781fc24e 100644 --- a/src/xrdsvc/HttpSvc.cc +++ b/src/xrdsvc/HttpSvc.cc @@ -33,6 +33,7 @@ #include "wpublish/ChunkInventory.h" #include "xrdsvc/HttpMonitorModule.h" #include "xrdsvc/HttpReplicaMgtModule.h" +#include "xrdsvc/HttpWorkerCzarModule.h" // LSST headers #include "lsst/log/Log.h" @@ -134,6 +135,12 @@ uint16_t HttpSvc::start() { HttpReplicaMgtModule::process(::serviceName, self->_foreman, req, resp, "REBUILD", http::AuthType::REQUIRED); }}}); + _httpServerPtr->addHandlers( + {{"POST", "/queryjob", + [self](shared_ptr const& req, shared_ptr const& resp) { + HttpWorkerCzarModule::process(::serviceName, self->_foreman, req, resp, "QUERYJOB", + http::AuthType::REQUIRED); + }}}); _httpServerPtr->start(); // Initialize the I/O context and start the service threads. At this point diff --git a/src/xrdsvc/HttpWorkerCzarModule.cc b/src/xrdsvc/HttpWorkerCzarModule.cc new file mode 100644 index 0000000000..af6f741daf --- /dev/null +++ b/src/xrdsvc/HttpWorkerCzarModule.cc @@ -0,0 +1,213 @@ +/* + * LSST Data Management System + * + * This product includes software developed by the + * LSST Project (http://www.lsst.org/). + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the LSST License Statement and + * the GNU General Public License along with this program. If not, + * see . + */ + +// Class header +#include "xrdsvc/HttpWorkerCzarModule.h" + +// System headers +#include +#include +#include + +// Third party headers +#include "lsst/log/Log.h" + +// Qserv headers +#include "http/Client.h" // TODO:UJ will probably need to be removed +#include "http/Exceptions.h" +#include "http/MetaModule.h" +#include "http/RequestBodyJSON.h" +#include "http/RequestQuery.h" +#include "mysql/MySqlUtils.h" +#include "qmeta/types.h" +#include "util/String.h" +#include "util/Timer.h" +#include "wbase/FileChannelShared.h" +#include "wbase/Task.h" +#include "wbase/UberJobData.h" +#include "wbase/UserQueryInfo.h" +#include "wconfig/WorkerConfig.h" +#include "wcontrol/Foreman.h" +#include "wcontrol/ResourceMonitor.h" +#include "wpublish/ChunkInventory.h" +#include "xrdsvc/SsiProvider.h" +#include "xrdsvc/XrdName.h" + +using namespace std; +using json = nlohmann::json; + +namespace { +LOG_LOGGER _log = LOG_GET("lsst.qserv.xrdsvc.HttpReplicaMgt"); +} + +namespace { +// These markers if reported in the extended error response object of the failed +// requests could be used by a caller for refining the completion status +// of the corresponding Controller-side operation. +// TODO:UJ Are these errors seem useful enought to be centralized ??? +json const extErrorInvalidParam = json::object({{"invalid_param", 1}}); +json const extErrorReplicaInUse = json::object({{"in_use", 1}}); + +} // namespace + +namespace lsst::qserv::xrdsvc { + +void HttpWorkerCzarModule::process(string const& context, shared_ptr const& foreman, + shared_ptr const& req, + shared_ptr const& resp, string const& subModuleName, + http::AuthType const authType) { + HttpWorkerCzarModule module(context, foreman, req, resp); + module.execute(subModuleName, authType); +} + +HttpWorkerCzarModule::HttpWorkerCzarModule(string const& context, + shared_ptr const& foreman, + shared_ptr const& req, + shared_ptr const& resp) + : HttpModule(context, foreman, req, resp) {} + +json HttpWorkerCzarModule::executeImpl(string const& subModuleName) { + string const func = string(__func__) + "[sub-module='" + subModuleName + "']"; + enforceInstanceId(func, wconfig::WorkerConfig::instance()->replicationInstanceId()); + enforceWorkerId(func); + if (subModuleName == "QUERYJOB") return _queryJob(); + throw invalid_argument(context() + func + " unsupported sub-module"); +} + +json HttpWorkerCzarModule::_queryJob() { + debug(__func__); + checkApiVersion(__func__, 34); + // At this point, API version, correct worker, and auth have been checked. + json jsRet = _handleQueryJob(__func__); + return jsRet; +} + +json HttpWorkerCzarModule::_handleQueryJob(string const& func) { + json jsRet; + vector ujTasks; + try { + // See qdisp::UberJob::runUberJob() for json message construction. + auto const& jsReq = body().objJson; + string const targetWorkerId = body().required("worker"); + + http::RequestBodyJSON rbCzar(body().required("czar")); + auto czarName = rbCzar.required("name"); + auto czarId = rbCzar.required("id"); + auto czarPort = rbCzar.required("management-port"); + auto czarHostName = rbCzar.required("management-host-name"); + LOGS(_log, LOG_LVL_TRACE, + __func__ << " czar n=" << czarName << " id=" << czarId << " p=" << czarPort + << " h=" << czarHostName); + + http::RequestBodyJSON rbUberJob(body().required("uberjob")); + auto ujQueryId = rbUberJob.required("queryid"); + auto ujId = rbUberJob.required("uberjobid"); + auto ujCzarId = rbUberJob.required("czarid"); + auto ujJobs = rbUberJob.required("jobs"); + LOGS(_log, LOG_LVL_TRACE, + __func__ << " uj qid=" << ujQueryId << " ujid=" << ujId << " czid=" << ujCzarId); + + auto ujData = wbase::UberJobData::create(ujId, czarName, czarId, czarHostName, czarPort, ujQueryId, + targetWorkerId, foreman(), authKey()); + + // Find the entry for this queryId, creat a new one if needed. + wbase::UserQueryInfo::Ptr userQueryInfo = wbase::UserQueryInfo::uqMapInsert(ujQueryId); + userQueryInfo->addUberJob(ujData); + + auto channelShared = + wbase::FileChannelShared::create(ujData, czarId, czarHostName, czarPort, targetWorkerId); + ujData->setFileChannelShared(channelShared); + + // TODO:UJ These items should be stored higher in the message structure as they get + // duplicated and should always be the same within an UberJob. + QueryId jdQueryId = 0; + proto::ScanInfo scanInfo; + bool scanInfoSet = false; + bool jdScanInteractive = false; + int jdMaxTableSize = 0; + + for (auto const& job : ujJobs) { + json const& jsJobDesc = job["jobdesc"]; + http::RequestBodyJSON rbJobDesc(jsJobDesc); + // See qproc::TaskMsgFactory::makeMsgJson for message construction. + auto const jdCzarId = rbJobDesc.required("czarId"); + jdQueryId = rbJobDesc.required("queryId"); + auto const jdJobId = rbJobDesc.required("jobId"); + auto const jdAttemptCount = rbJobDesc.required("attemptCount"); + auto const jdQuerySpecDb = rbJobDesc.required("querySpecDb"); + auto const jdScanPriority = rbJobDesc.required("scanPriority"); + jdScanInteractive = rbJobDesc.required("scanInteractive"); + jdMaxTableSize = rbJobDesc.required("maxTableSize"); + auto const jdChunkId = rbJobDesc.required("chunkId"); + LOGS(_log, LOG_LVL_TRACE, + __func__ << " jd cid=" << jdCzarId << " jdQId=" << jdQueryId << " jdJobId=" << jdJobId + << " jdAtt=" << jdAttemptCount << " jdQDb=" << jdQuerySpecDb + << " jdScanPri=" << jdScanPriority << " interactive=" << jdScanInteractive + << " maxTblSz=" << jdMaxTableSize << " chunkId=" << jdChunkId); + + auto const jdChunkScanTables = rbJobDesc.required("chunkScanTables"); + if (!scanInfoSet) { + for (auto const& tbl : jdChunkScanTables) { + http::RequestBodyJSON rbTbl(tbl); + auto const& chunkScanDb = rbTbl.required("db"); + auto lockInMemory = rbTbl.required("lockInMemory"); + auto const& chunkScanTable = rbTbl.required("table"); + auto tblScanRating = rbTbl.required("tblScanRating"); + LOGS(_log, LOG_LVL_TRACE, + __func__ << " chunkSDb=" << chunkScanDb << " lockinmem=" << lockInMemory + << " csTble=" << chunkScanTable << " tblScanRating=" << tblScanRating); + scanInfo.infoTables.emplace_back(chunkScanDb, chunkScanTable, lockInMemory, + tblScanRating); + scanInfoSet = true; + } + } + scanInfo.scanRating = jdScanPriority; + } + + // create tasks and add them to ujData + auto chunkTasks = wbase::Task::createTasksForChunk( + ujData, ujJobs, channelShared, scanInfo, jdScanInteractive, jdMaxTableSize, + foreman()->chunkResourceMgr(), foreman()->mySqlConfig(), foreman()->sqlConnMgr(), + foreman()->queriesAndChunks(), foreman()->httpPort()); + ujTasks.insert(ujTasks.end(), chunkTasks.begin(), chunkTasks.end()); + + channelShared->setTaskCount(ujTasks.size()); + ujData->addTasks(ujTasks); + + util::Timer timer; + timer.start(); + foreman()->processTasks(ujTasks); // Queues tasks to be run later. + timer.stop(); + LOGS(_log, LOG_LVL_DEBUG, + __func__ << " Enqueued UberJob time=" << timer.getElapsed() << " " << jsReq); + + string note = string("qId=") + to_string(ujQueryId) + " ujId=" + to_string(ujId) + + " tasks in uberJob=" + to_string(channelShared->getTaskCount()); + jsRet = {{"success", 1}, {"errortype", "none"}, {"note", note}}; + + } catch (wbase::TaskException const& texp) { + LOGS(_log, LOG_LVL_ERROR, "wbase::TaskException received " << texp.what()); + jsRet = {{"success", 0}, {"errortype", "parse"}, {"note", texp.what()}}; + } + return jsRet; +} + +} // namespace lsst::qserv::xrdsvc diff --git a/src/xrdsvc/HttpWorkerCzarModule.h b/src/xrdsvc/HttpWorkerCzarModule.h new file mode 100644 index 0000000000..bb75a63c55 --- /dev/null +++ b/src/xrdsvc/HttpWorkerCzarModule.h @@ -0,0 +1,88 @@ +/* + * LSST Data Management System + * + * This product includes software developed by the + * LSST Project (http://www.lsst.org/). + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the LSST License Statement and + * the GNU General Public License along with this program. If not, + * see . + */ +#ifndef LSST_QSERV_XRDSVC_HTTPWORKERCZARMODULE_H +#define LSST_QSERV_XRDSVC_HTTPWORKERCZARMODULE_H + +// System headers +#include +#include +#include +#include + +// Third party headers +#include "nlohmann/json.hpp" + +// Qserv headers +#include "qmeta/types.h" +#include "xrdsvc/HttpModule.h" + +namespace lsst::qserv::qhttp { +class Request; +class Response; +} // namespace lsst::qserv::qhttp + +namespace lsst::qserv::wcontrol { +class Foreman; +} // namespace lsst::qserv::wcontrol + +namespace lsst::qserv::xrdsvc { +class SsiProviderServer; +} // namespace lsst::qserv::xrdsvc + +// This header declarations +namespace lsst::qserv::xrdsvc { + +/// This class handles Http message from the czar to the worker. +class HttpWorkerCzarModule : public xrdsvc::HttpModule { +public: + /// @note supported values for parameter 'subModuleName' are: + /// 'QUERYJOB' - Convert an UberJob message into Tasks and a send channel. + /// @throws std::invalid_argument for unknown values of parameter 'subModuleName' + static void process(std::string const& context, std::shared_ptr const& foreman, + std::shared_ptr const& req, + std::shared_ptr const& resp, std::string const& subModuleName, + http::AuthType const authType = http::AuthType::NONE); + + HttpWorkerCzarModule() = delete; + HttpWorkerCzarModule(HttpWorkerCzarModule const&) = delete; + HttpWorkerCzarModule& operator=(HttpWorkerCzarModule const&) = delete; + + ~HttpWorkerCzarModule() final = default; + +protected: + virtual nlohmann::json executeImpl(std::string const& subModuleName) final; + +private: + HttpWorkerCzarModule(std::string const& context, std::shared_ptr const& foreman, + std::shared_ptr const& req, + std::shared_ptr const& resp); + + /// Handle an UberJob message from the czar to run it on this worker by calling _handleQueryJob. + nlohmann::json _queryJob(); + + /// Handle an UberJob message from the czar to run it on this worker, this does + /// work of deciphering the message, creating UberJobData objects and Task objects. + nlohmann::json _handleQueryJob(std::string const& func); +}; + +} // namespace lsst::qserv::xrdsvc + +#endif // LSST_QSERV_XRDSVC_HTTPWORKERCZARMODULE_H