From 9a0018007b65041f8f68834433303cada1179d1d Mon Sep 17 00:00:00 2001 From: Jake Smith Date: Wed, 5 Jul 2023 12:38:43 +0100 Subject: [PATCH] HPCC-28555 Add caching, restrict to unfiltered, and simplify Signed-off-by: Jake Smith --- dali/base/dautils.cpp | 12 +--- ecl/hthor/hthorkey.cpp | 16 ++--- helm/hpcc/values.schema.json | 12 ++-- system/jlib/jfile.cpp | 66 ++++++++++++------- system/jlib/jfile.hpp | 8 ++- system/jlib/jptree.cpp | 2 +- .../activities/indexread/thindexreadslave.cpp | 17 ++--- 7 files changed, 70 insertions(+), 63 deletions(-) diff --git a/dali/base/dautils.cpp b/dali/base/dautils.cpp index e92876252b7..b5b4ca134c9 100644 --- a/dali/base/dautils.cpp +++ b/dali/base/dautils.cpp @@ -103,23 +103,17 @@ void getPlaneHosts(StringArray &hosts, IPropertyTree *plane) } } -constexpr const char * lz_plane_path = "storage/planes[@category='lz']"; - IPropertyTreeIterator * getDropZonePlanesIterator(const char * name) { - StringBuffer xpath(lz_plane_path); - if (!isEmptyString(name)) - xpath.appendf("[@name='%s']", name); - return getGlobalConfigSP()->getElements(xpath); + return getPlanesIterator("lz", name); } IPropertyTree * getDropZonePlane(const char * name) { if (isEmptyString(name)) throw makeStringException(-1, "Drop zone name required"); - StringBuffer xpath(lz_plane_path); - xpath.appendf("[@name='%s']", name); - return getGlobalConfigSP()->getPropTree(xpath); + Owned iter = getDropZonePlanesIterator(name); + return iter->first() ? &iter->get() : nullptr; } IPropertyTree * findPlane(const char *category, const char * path, const char * host, bool ipMatch, bool mustMatch) diff --git a/ecl/hthor/hthorkey.cpp b/ecl/hthor/hthorkey.cpp index 6ad3244bb89..9f5b46155ad 100644 --- a/ecl/hthor/hthorkey.cpp +++ b/ecl/hthor/hthorkey.cpp @@ -288,8 +288,6 @@ class CHThorIndexReadActivityBase : public CHThorActivityBase bool singlePart = false; // a single part index, not part of a super file - optimize so never reload the part. bool localSortKey = false; bool initializedFileInfo = false; - int configUseBlockedIndexIO = -1; // tri-state (-1 = not set) - size32_t configBlockedIndexIOSize = 0; //for layout translation Owned layoutTrans; @@ -326,10 +324,6 @@ CHThorIndexReadActivityBase::CHThorIndexReadActivityBase(IAgentContext &_agent, if (recordTranslationModeHintText) recordTranslationModeHint = getTranslationMode(recordTranslationModeHintText, true); } - // can be defined as runtime option, but normally defined at plane level - // see call to getBlockedIndexIOSize - configUseBlockedIndexIO = agent.queryWorkUnit()->getDebugValueInt("useBlockedIndexIO", -1); - configBlockedIndexIOSize = agent.queryWorkUnit()->getDebugValueInt("blockedIndexIOKB", 0) * 1024; } CHThorIndexReadActivityBase::~CHThorIndexReadActivityBase() @@ -601,9 +595,13 @@ void CHThorIndexReadActivityBase::killPart() bool CHThorIndexReadActivityBase::setCurrentPart(unsigned whichPart) { IDistributedFilePart &part = df->queryPart(whichPart); - StringBuffer planeName; - df->getClusterName(part.copyClusterNum(0), planeName); - size32_t blockedSize = getBlockedIndexIOSize(planeName, configUseBlockedIndexIO, configBlockedIndexIOSize); + size32_t blockedSize = 0; + if (!helper.hasSegmentMonitors()) // unfiltered + { + StringBuffer planeName; + df->getClusterName(part.copyClusterNum(0), planeName); + blockedSize = getBlockedFileIOSize(planeName); + } keyIndex.setown(openKeyFile(part, blockedSize)); if(df->numParts() == 1) verifyIndex(keyIndex); diff --git a/helm/hpcc/values.schema.json b/helm/hpcc/values.schema.json index 780a9b0fcf4..ec1cf9bdc0f 100644 --- a/helm/hpcc/values.schema.json +++ b/helm/hpcc/values.schema.json @@ -544,14 +544,10 @@ "waitForMount": { "type": "boolean" }, - "useBlockedIndexIO": { - "description": "Use blocked IO read for serial index reads", - "type": "boolean", - "default": false - }, - "blockedIndexIOKB": { - "description": "Block size (KB) to use for serial index reading when useBlockedIndexIO is on", - "type": "integer" + "blockedFileIOKB": { + "description": "Optimal block size for efficient reading from this plane. Implementations will use if they can", + "type": "integer", + "default": 0 }, "components": {}, "prefix": {}, diff --git a/system/jlib/jfile.cpp b/system/jlib/jfile.cpp index 4827a9433b8..17cdc321fbc 100644 --- a/system/jlib/jfile.cpp +++ b/system/jlib/jfile.cpp @@ -7568,6 +7568,16 @@ IPropertyTree * getRemoteStorage(const char * name) return global->getPropTree(xpath); } +IPropertyTreeIterator * getPlanesIterator(const char * category, const char *name) +{ + StringBuffer xpath("storage/planes"); + if (!isEmptyString(category)) + xpath.appendf("[@category='%s']", category); + if (!isEmptyString(name)) + xpath.appendf("[@name='%s']", name); + return getGlobalConfigSP()->getElements(xpath); +} + IAPICopyClient * createApiCopyClient(IStorageApiInfo * source, IStorageApiInfo * target) { ReadLockBlock block(containedFileHookLock); @@ -7639,34 +7649,44 @@ class CBlockedFileIO : public CSimpleInterfaceOf extern IFileIO *createBlockedIO(IFileIO *base, size32_t blockSize) { - PROGLOG("createBlockedIO(%u)", (unsigned)blockSize); + PROGLOG("createBlockedIO - blockSize = %u", blockSize); return new CBlockedFileIO(base, blockSize); } -static constexpr size32_t defaultBlockedIndexIOKB = 1024; -size32_t getBlockedIndexIOSize(const char *planeName, int configUseBlockedIndexIO, size32_t configBlockedIndeIOSize) +// Cache/update plane index blocked IO settings +static unsigned planeBlockIOMapCBId = 0; +static std::unordered_map planeBlockedIOMap; +static CriticalSection planeBlockedIOMapCrit; +MODULE_INIT(INIT_PRIORITY_STANDARD) { - size32_t blockedSize = 0; // off - if (0 != configUseBlockedIndexIO) + auto updateFunc = [&](const IPropertyTree *oldComponentConfiguration, const IPropertyTree *oldGlobalConfiguration) { - if (1 == configUseBlockedIndexIO) - blockedSize = configBlockedIndeIOSize; - if (0 == blockedSize) + CriticalBlock b(planeBlockedIOMapCrit); + planeBlockedIOMap.clear(); + Owned planesIter = getPlanesIterator(nullptr, nullptr); + ForEach(*planesIter) { - // could cache, but not sure it's worth it - int useBlockedIndexIO = configUseBlockedIndexIO; - Owned plane = getStoragePlane(planeName); - if (plane) - { - if ((1 == configUseBlockedIndexIO) || plane->getPropBool("@useBlockedIndexIO")) - { - useBlockedIndexIO = 1; - blockedSize = plane->getPropInt("@blockedIndexIOKB") * 1024; - } - } - if ((1 == useBlockedIndexIO) && (0 == blockedSize)) - blockedSize = defaultBlockedIndexIOKB * 1024; + const IPropertyTree &plane = planesIter->query(); + size32_t blockedFileIOSize = plane.getPropInt("@blockedFileIOKB") * 1024; + planeBlockedIOMap[plane.queryProp("@name")] = blockedFileIOSize; } - } - return blockedSize; + }; + planeBlockIOMapCBId = installConfigUpdateHook(updateFunc, true); + return true; +} + +MODULE_EXIT() +{ + removeConfigUpdateHook(planeBlockIOMapCBId); +} + + +size32_t getBlockedFileIOSize(const char *planeName, size32_t defaultSize) +{ + CriticalBlock b(planeBlockedIOMapCrit); + auto it = planeBlockedIOMap.find(planeName); + if (it != planeBlockedIOMap.end()) + return it->second; + else + return defaultSize; } diff --git a/system/jlib/jfile.hpp b/system/jlib/jfile.hpp index 59c822cedf5..f391982c4ce 100644 --- a/system/jlib/jfile.hpp +++ b/system/jlib/jfile.hpp @@ -774,12 +774,14 @@ jlib_decl IFileEventWatcher *createFileEventWatcher(FileWatchFunc callback); //---- Storage plane related functions ---------------------------------------------------- +interface IPropertyTree; +interface IPropertyTreeIterator; extern jlib_decl IPropertyTree * getHostGroup(const char * name, bool required); extern jlib_decl IPropertyTree * getStoragePlane(const char * name); extern jlib_decl IPropertyTree * getRemoteStorage(const char * name); +extern jlib_decl IPropertyTreeIterator * getPlanesIterator(const char * category, const char *name); -constexpr size32_t defaultBlockFileIOSize = 0x100000; // 1MB -extern jlib_decl IFileIO *createBlockedIO(IFileIO *base, size32_t blockSize=defaultBlockFileIOSize); -extern jlib_decl size32_t getBlockedIndexIOSize(const char *planeName, int configUseBlockedIndexIO, size32_t configBlockedIndeIOSize); +extern jlib_decl IFileIO *createBlockedIO(IFileIO *base, size32_t blockSize); +extern jlib_decl size32_t getBlockedFileIOSize(const char *planeName, size32_t defaultSize=0); #endif diff --git a/system/jlib/jptree.cpp b/system/jlib/jptree.cpp index b1f0dc01814..beafd59809c 100644 --- a/system/jlib/jptree.cpp +++ b/system/jlib/jptree.cpp @@ -8878,7 +8878,7 @@ void executeConfigUpdaterCallbacks() { if (!configFileUpdater) // NB: executeConfigUpdaterCallbacks should always be called after configFileUpdater is initialized return; - configFileUpdater->executeCallbacks(componentConfiguration.getLink(), globalConfiguration.getLink()); + configFileUpdater->executeCallbacks(componentConfiguration, globalConfiguration); } void CConfigUpdateHook::clear() diff --git a/thorlcr/activities/indexread/thindexreadslave.cpp b/thorlcr/activities/indexread/thindexreadslave.cpp index 241d92da014..6ee37516bcd 100644 --- a/thorlcr/activities/indexread/thindexreadslave.cpp +++ b/thorlcr/activities/indexread/thindexreadslave.cpp @@ -78,8 +78,6 @@ class CIndexReadSlaveBase : public CSlaveActivity Owned lazyIFileIO; mutable CriticalSection ioStatsCS; unsigned fileTableStart = NotFound; - int configUseBlockedIndexIO = -1; // tri-state (-1 = not set) - size32_t configBlockedIndexIOSize = 0; template class CCaptureIndexStats @@ -310,9 +308,13 @@ class CIndexReadSlaveBase : public CSlaveActivity // local key handling - StringBuffer planeName; - part.queryOwner().getClusterLabel(0, planeName); - size32_t blockedSize = getBlockedIndexIOSize(planeName, configUseBlockedIndexIO, configBlockedIndexIOSize); + size32_t blockedSize = 0; + if (!helper->hasSegmentMonitors()) // unfiltered + { + StringBuffer planeName; + part.queryOwner().getClusterLabel(0, planeName); + blockedSize = getBlockedFileIOSize(planeName); + } lazyIFileIO.setown(queryThor().queryFileCache().lookupIFileIO(*this, logicalFilename, part, nullptr, indexReadActivityStatistics, blockedSize)); RemoteFilename rfn; @@ -557,11 +559,6 @@ class CIndexReadSlaveBase : public CSlaveActivity serializer.set(queryRowSerializer()); helper->setCallback(&callback); reInit = 0 != (helper->getFlags() & (TIRvarfilename|TIRdynamicfilename)); - - // can be defined as runtime option, but normally defined at plane level - // see call to getBlockedIndexIOSize - configUseBlockedIndexIO = getOptInt("useBlockedIndexIO", -1); - configBlockedIndexIOSize = getOptInt("blockedIndexIOKB") * 1024; } rowcount_t getLocalCount(const rowcount_t keyedLimit, bool hard) {