From c139c799b1a6b5c8eb5a959524aa9ff0d55df124 Mon Sep 17 00:00:00 2001 From: Shamser Ahmed Date: Thu, 23 Jan 2025 16:54:52 +0000 Subject: [PATCH 1/3] HPCC-32901 Make super file cost and reads/write updates consistent - Super file's numDiskReads, numDiskWrites, readCost and writeCost stats are re-calculated from subfiles stats every time a subfile is added or removed from a super file - Super file's readCost and numReads are updated every time the superfile is read Signed-off-by: Shamser Ahmed --- dali/base/dadfs.cpp | 151 ++++++++++++++++++++-- dali/base/dadfs.hpp | 14 +- ecl/hthor/hthor.cpp | 1 + esp/clients/ws_dfsclient/ws_dfsclient.cpp | 17 ++- thorlcr/graph/thgraphmaster.cpp | 6 +- 5 files changed, 176 insertions(+), 13 deletions(-) diff --git a/dali/base/dadfs.cpp b/dali/base/dadfs.cpp index 734db18b27b..fb6fa19fdb9 100644 --- a/dali/base/dadfs.cpp +++ b/dali/base/dadfs.cpp @@ -5069,7 +5069,57 @@ protected: friend class CDistributedFilePart; atRestCost = calcFileAtRestCost(cluster, sizeGB, fileAgeDays); } if (attrs) - accessCost = getReadCost(*attrs, cluster) + getWriteCost(*attrs, cluster); + accessCost = ::getReadCost(*attrs, cluster) + ::getWriteCost(*attrs, cluster); + } + + virtual bool getNumReads(stat_type &numReads) const override + { + const IPropertyTree *attrs = root->queryPropTree("Attr"); + if (attrs && attrs->hasProp(getDFUQResultFieldName(DFUQRFnumDiskReads))) + numReads = attrs->getPropInt64(getDFUQResultFieldName(DFUQRFnumDiskReads)); + else + numReads = 0; + return true; + } + + virtual bool getNumWrites(stat_type &numWrites) const override + { + const IPropertyTree *attrs = root->queryPropTree("Attr"); + if (attrs && attrs->hasProp(getDFUQResultFieldName(DFUQRFnumDiskWrites))) + numWrites = attrs->getPropInt64(getDFUQResultFieldName(DFUQRFnumDiskWrites)); + else + numWrites = 0; + return true; + } + + virtual bool getReadCost(cost_type &cost, bool calculateIfMissing=false) const override + { + IPropertyTree *attrs = root->queryPropTree("Attr"); + if (attrs && (attrs->hasProp(getDFUQResultFieldName(DFUQRFreadCost)) ||calculateIfMissing)) + { + StringBuffer clusterName; + // getClusterName isn't const function, so need to cast this to non-const + const_cast(this)->getClusterName(0, clusterName); + cost = ::getReadCost(*attrs, clusterName.str()); + return true; + } + cost = 0; + return false; + } + + virtual bool getWriteCost(cost_type &cost, bool calculateIfMissing=false) const override + { + const IPropertyTree *attrs = root->queryPropTree("Attr"); + if (attrs && (attrs->hasProp(getDFUQResultFieldName(DFUQRFwriteCost)) || calculateIfMissing)) + { + StringBuffer clusterName; + // getClusterName isn't const function, so need to cast this to non-const + const_cast(this)->getClusterName(0, clusterName); + cost = ::getWriteCost(*attrs, clusterName.str()); + return true; + } + cost = 0; + return false; } }; @@ -6336,6 +6386,82 @@ class CDistributedSuperFile: public CDistributedFileBase return true; } + virtual bool getNumReads(stat_type &numReads) const override + { + const IPropertyTree *attrs = root->queryPropTree("Attr"); + if (attrs && attrs->hasProp(getDFUQResultFieldName(DFUQRFnumDiskReads))) + numReads = attrs->getPropInt64(getDFUQResultFieldName(DFUQRFnumDiskReads)); + else + { + numReads = 0; + ForEachItemIn(i,subfiles) + { + stat_type c; + if (!subfiles.item(i).getNumReads(c)) + return false; + numReads += c; + } + } + return true; + } + + virtual bool getNumWrites(stat_type &numWrites) const override + { + const IPropertyTree *attrs = root->queryPropTree("Attr"); + if (attrs && attrs->hasProp(getDFUQResultFieldName(DFUQRFnumDiskWrites))) + numWrites = attrs->getPropInt64(getDFUQResultFieldName(DFUQRFnumDiskWrites)); + else + { + numWrites = 0; + ForEachItemIn(i,subfiles) + { + stat_type c; + if (!subfiles.item(i).getNumWrites(c)) + return false; + numWrites += c; + } + } + return true; + } + + virtual bool getReadCost(cost_type &cost, bool calculateIfMissing=false) const override + { + const IPropertyTree *attrs = root->queryPropTree("Attr"); + if (attrs && attrs->hasProp(getDFUQResultFieldName(DFUQRFreadCost))) + cost = attrs->getPropInt64(getDFUQResultFieldName(DFUQRFreadCost)); + else + { + cost = 0; + ForEachItemIn(i,subfiles) + { + cost_type c; + if (!subfiles.item(i).getReadCost(c,calculateIfMissing)) + return false; + cost += c; + } + } + return true; + } + + virtual bool getWriteCost(cost_type &cost, bool calculateIfMissing=false) const override + { + const IPropertyTree *attrs = root->queryPropTree("Attr"); + if (attrs && attrs->hasProp(getDFUQResultFieldName(DFUQRFwriteCost))) + cost = attrs->getPropInt64(getDFUQResultFieldName(DFUQRFwriteCost)); + else + { + cost = 0; + ForEachItemIn(i,subfiles) + { + cost_type c; + if (!subfiles.item(i).getWriteCost(c,calculateIfMissing)) + return false; + cost += c; + } + } + return true; + } + virtual IDistributedSuperFile *querySuperFile() override { return this; @@ -6562,6 +6688,10 @@ class CDistributedSuperFile: public CDistributedFileBase attrs.removeProp("@minSkew"); attrs.removeProp("@maxSkewPart"); attrs.removeProp("@minSkewPart"); + attrs.removeProp(getDFUQResultFieldName(DFUQRFnumDiskReads)); + attrs.removeProp(getDFUQResultFieldName(DFUQRFnumDiskWrites)); + attrs.removeProp(getDFUQResultFieldName(DFUQRFreadCost)); + attrs.removeProp(getDFUQResultFieldName(DFUQRFwriteCost)); __int64 fs = getFileSize(false,false); if (fs!=-1) @@ -6589,6 +6719,16 @@ class CDistributedSuperFile: public CDistributedFileBase attrs.setPropBin("_record_layout", mb.length(), mb.bufferBase()); if (getRecordLayout(mb, "_rtlType")) attrs.setPropBin("_rtlType", mb.length(), mb.bufferBase()); + stat_type numReads, numWrites; + if (getNumReads(numReads)) + attrs.setPropInt64(getDFUQResultFieldName(DFUQRFnumDiskReads), numReads); + if (getNumWrites(numWrites)) + attrs.setPropInt64(getDFUQResultFieldName(DFUQRFnumDiskWrites), numWrites); + cost_type readCost, writeCost; + if (getReadCost(readCost)) + attrs.setPropInt64(getDFUQResultFieldName(DFUQRFreadCost), numReads); + if (getWriteCost(writeCost)) + attrs.setPropInt64(getDFUQResultFieldName(DFUQRFwriteCost), numWrites); const char *kind = nullptr; Owned subIter = getSubFileIterator(true); ForEach(*subIter) @@ -13407,16 +13547,7 @@ IDFProtectedIterator *CDistributedFileDirectory::lookupProtectedFiles(const char return new CDFProtectedIterator(owner,notsuper,superonly,defaultTimeout); } -const char* DFUQResultFieldNames[] = { "@name", "@description", "@group", "@kind", "@modified", "@job", "@owner", - "@DFUSFrecordCount", "@recordCount", "@recordSize", "@DFUSFsize", "@size", "@workunit", "@DFUSFcluster", "@numsubfiles", - "@accessed", "@numparts", "@compressedSize", "@directory", "@partmask", "@superowners", "@persistent", "@protect", "@compressed", - "@cost", "@numDiskReads", "@numDiskWrites", "@atRestCost", "@accessCost", "@maxSkew", "@minSkew", "@maxSkewPart", "@minSkewPart", - "@readCost", "@writeCost" }; -extern da_decl const char* getDFUQResultFieldName(DFUQResultField field) -{ - return DFUQResultFieldNames[field]; -} IPropertyTreeIterator *deserializeFileAttrIterator(MemoryBuffer& mb, unsigned numFiles, DFUQResultField* localFilters, const char* localFilterBuf) { diff --git a/dali/base/dadfs.hpp b/dali/base/dadfs.hpp index fb8453b291a..1d50d63cf32 100644 --- a/dali/base/dadfs.hpp +++ b/dali/base/dadfs.hpp @@ -304,8 +304,16 @@ enum DFUQResultField }; extern da_decl const char* getDFUQFilterFieldName(DFUQFilterField field); -extern da_decl const char* getDFUQResultFieldName(DFUQResultField field); +constexpr const char* DFUQResultFieldNames[] = { "@name", "@description", "@group", "@kind", "@modified", "@job", "@owner", + "@DFUSFrecordCount", "@recordCount", "@recordSize", "@DFUSFsize", "@size", "@workunit", "@DFUSFcluster", "@numsubfiles", + "@accessed", "@numparts", "@compressedSize", "@directory", "@partmask", "@superowners", "@persistent", "@protect", "@compressed", + "@cost", "@numDiskReads", "@numDiskWrites", "@atRestCost", "@accessCost", "@maxSkew", "@minSkew", "@maxSkewPart", "@minSkewPart", + "@readCost", "@writeCost" }; +extern da_decl constexpr const char* getDFUQResultFieldName(DFUQResultField field) +{ + return DFUQResultFieldNames[field]; +} /** * File operations can be included in a transaction to ensure that multiple * updates are handled atomically. This is the interface to a transaction @@ -434,6 +442,10 @@ interface IDistributedFile: extends IInterface virtual int getExpire(StringBuffer *expirationDate) = 0; virtual void setExpire(int expireDays) = 0; virtual void getCost(const char * cluster, cost_type & atRestCost, cost_type & accessCost) = 0; + virtual bool getNumReads(stat_type &numReads) const = 0; + virtual bool getNumWrites(stat_type &numWrites) const = 0; + virtual bool getReadCost(cost_type &cost, bool calculateIfMissing=false) const = 0; + virtual bool getWriteCost(cost_type &cost, bool calculateIfMissing=false) const = 0; }; diff --git a/ecl/hthor/hthor.cpp b/ecl/hthor/hthor.cpp index 6dd02b640c7..30fb379f14f 100644 --- a/ecl/hthor/hthor.cpp +++ b/ecl/hthor/hthor.cpp @@ -8545,6 +8545,7 @@ void CHThorDiskReadBaseActivity::closepart() { IDistributedSuperFile *super = dFile->querySuperFile(); dFile = &(super->querySubFile(subfile, true)); + updateCostAndNumReads(super, curDiskReads); } } updateCostAndNumReads(dFile, curDiskReads); diff --git a/esp/clients/ws_dfsclient/ws_dfsclient.cpp b/esp/clients/ws_dfsclient/ws_dfsclient.cpp index 81a845665a1..ce7148fa91b 100644 --- a/esp/clients/ws_dfsclient/ws_dfsclient.cpp +++ b/esp/clients/ws_dfsclient/ws_dfsclient.cpp @@ -236,7 +236,22 @@ class CServiceDistributedFileBase : public CSimpleInterfaceOf virtual bool getSkewInfo(unsigned &maxSkew, unsigned &minSkew, unsigned &maxSkewPart, unsigned &minSkewPart, bool calculateIfMissing) override { return legacyDFSFile->getSkewInfo(maxSkew, minSkew, maxSkewPart, minSkewPart, calculateIfMissing); } virtual int getExpire(StringBuffer *expirationDate) override { return legacyDFSFile->getExpire(expirationDate); } virtual void getCost(const char * cluster, cost_type & atRestCost, cost_type & accessCost) override { legacyDFSFile->getCost(cluster, atRestCost, accessCost); } - + virtual bool getNumReads(stat_type &numReads) const override + { + return legacyDFSFile->getNumReads(numReads); + } + virtual bool getNumWrites(stat_type &numWrites) const override + { + return legacyDFSFile->getNumWrites(numWrites); + } + virtual bool getReadCost(cost_type &cost, bool calculateIfMissing) const override + { + return legacyDFSFile->getReadCost(cost, calculateIfMissing); + } + virtual bool getWriteCost(cost_type &cost, bool calculateIfMissing) const override + { + return legacyDFSFile->getWriteCost(cost, calculateIfMissing); + } // setters (change file meta data) virtual void setPreferredClusters(const char *clusters) override { legacyDFSFile->setPreferredClusters(clusters); } virtual void setSingleClusterOnly() override { legacyDFSFile->setSingleClusterOnly(); } diff --git a/thorlcr/graph/thgraphmaster.cpp b/thorlcr/graph/thgraphmaster.cpp index 9dd7350afaa..cd04ffacc83 100644 --- a/thorlcr/graph/thgraphmaster.cpp +++ b/thorlcr/graph/thgraphmaster.cpp @@ -670,7 +670,7 @@ cost_type CMasterActivity::calcFileReadCostStats(bool updateFileProps) curReadCost = calcFileAccessCost(clusterName, 0, curDiskReads); if (updateFileProps) - updateCostAndNumReads(file, curDiskReads); + updateCostAndNumReads(file, curDiskReads, curReadCost); return curReadCost; }; cost_type readCost = 0; @@ -693,12 +693,16 @@ cost_type CMasterActivity::calcFileReadCostStats(bool updateFileProps) if (super) { unsigned numSubFiles = super->numSubFiles(true); + stat_type curDiskReads = 0; for (unsigned i=0; iquerySubFile(i, true); readCost += updateReadCosts(useJhtreeCache, &subFile, *fileStats[fileIndex]); + curDiskReads += fileStats[fileIndex]->getStatisticSum(StNumDiskReads); fileIndex++; } + if (updateFileProps) + updateCostAndNumReads(super, curDiskReads, readCost); } else { From bb1b26b0e09bdc686106dc2ae25fd2143bd9e662 Mon Sep 17 00:00:00 2001 From: Shamser Ahmed Date: Tue, 4 Feb 2025 16:35:34 +0000 Subject: [PATCH 2/3] Changes following review Signed-off-by: Shamser Ahmed --- dali/base/dadfs.cpp | 85 +++++++++++++++++++++++++-------------------- dali/base/dadfs.hpp | 81 ++++++++++++++++++------------------------ 2 files changed, 81 insertions(+), 85 deletions(-) diff --git a/dali/base/dadfs.cpp b/dali/base/dadfs.cpp index fb6fa19fdb9..fb52bb3f02a 100644 --- a/dali/base/dadfs.cpp +++ b/dali/base/dadfs.cpp @@ -5075,33 +5075,33 @@ protected: friend class CDistributedFilePart; virtual bool getNumReads(stat_type &numReads) const override { const IPropertyTree *attrs = root->queryPropTree("Attr"); - if (attrs && attrs->hasProp(getDFUQResultFieldName(DFUQRFnumDiskReads))) - numReads = attrs->getPropInt64(getDFUQResultFieldName(DFUQRFnumDiskReads)); - else - numReads = 0; - return true; + numReads = attrs ? attrs->getPropInt64(getDFUQResultFieldName(DFUQRFnumDiskReads)) : 0; + return numReads > 0; } virtual bool getNumWrites(stat_type &numWrites) const override { const IPropertyTree *attrs = root->queryPropTree("Attr"); - if (attrs && attrs->hasProp(getDFUQResultFieldName(DFUQRFnumDiskWrites))) - numWrites = attrs->getPropInt64(getDFUQResultFieldName(DFUQRFnumDiskWrites)); - else - numWrites = 0; - return true; + numWrites = attrs ? attrs->getPropInt64(getDFUQResultFieldName(DFUQRFnumDiskWrites)) : 0; + return numWrites > 0; } virtual bool getReadCost(cost_type &cost, bool calculateIfMissing=false) const override { IPropertyTree *attrs = root->queryPropTree("Attr"); - if (attrs && (attrs->hasProp(getDFUQResultFieldName(DFUQRFreadCost)) ||calculateIfMissing)) + if (attrs) { - StringBuffer clusterName; - // getClusterName isn't const function, so need to cast this to non-const - const_cast(this)->getClusterName(0, clusterName); - cost = ::getReadCost(*attrs, clusterName.str()); - return true; + cost = attrs->getPropInt64(getDFUQResultFieldName(DFUQRFreadCost), (unsigned __int64)-1); + if ((unsigned __int64)-1 != cost) + return true; + if (calculateIfMissing) + { + StringBuffer clusterName; + // getClusterName isn't const function, so need to cast this to non-const + const_cast(this)->getClusterName(0, clusterName); + cost = calcReadCost(*attrs, clusterName.str()); + return true; + } } cost = 0; return false; @@ -5110,13 +5110,19 @@ protected: friend class CDistributedFilePart; virtual bool getWriteCost(cost_type &cost, bool calculateIfMissing=false) const override { const IPropertyTree *attrs = root->queryPropTree("Attr"); - if (attrs && (attrs->hasProp(getDFUQResultFieldName(DFUQRFwriteCost)) || calculateIfMissing)) + if (attrs) { - StringBuffer clusterName; - // getClusterName isn't const function, so need to cast this to non-const - const_cast(this)->getClusterName(0, clusterName); - cost = ::getWriteCost(*attrs, clusterName.str()); - return true; + cost = attrs->getPropInt64(getDFUQResultFieldName(DFUQRFwriteCost), (unsigned __int64)-1); + if ((unsigned __int64)-1 != cost) + return true; + if (calculateIfMissing) + { + StringBuffer clusterName; + // getClusterName isn't const function, so need to cast this to non-const + const_cast(this)->getClusterName(0, clusterName); + cost = calcWriteCost(*attrs, clusterName.str()); + return true; + } } cost = 0; return false; @@ -6397,12 +6403,11 @@ class CDistributedSuperFile: public CDistributedFileBase ForEachItemIn(i,subfiles) { stat_type c; - if (!subfiles.item(i).getNumReads(c)) - return false; - numReads += c; + if (subfiles.item(i).getNumReads(c)) + numReads += c; } } - return true; + return numReads > 0; } virtual bool getNumWrites(stat_type &numWrites) const override @@ -6416,12 +6421,11 @@ class CDistributedSuperFile: public CDistributedFileBase ForEachItemIn(i,subfiles) { stat_type c; - if (!subfiles.item(i).getNumWrites(c)) - return false; - numWrites += c; + if (subfiles.item(i).getNumWrites(c)) + numWrites += c; } } - return true; + return numWrites > 0; } virtual bool getReadCost(cost_type &cost, bool calculateIfMissing=false) const override @@ -6435,12 +6439,11 @@ class CDistributedSuperFile: public CDistributedFileBase ForEachItemIn(i,subfiles) { cost_type c; - if (!subfiles.item(i).getReadCost(c,calculateIfMissing)) - return false; - cost += c; + if (subfiles.item(i).getReadCost(c,calculateIfMissing)) + cost += c; } } - return true; + return cost > 0; } virtual bool getWriteCost(cost_type &cost, bool calculateIfMissing=false) const override @@ -6455,11 +6458,10 @@ class CDistributedSuperFile: public CDistributedFileBase { cost_type c; if (!subfiles.item(i).getWriteCost(c,calculateIfMissing)) - return false; - cost += c; + cost += c; } } - return true; + return cost > 0; } virtual IDistributedSuperFile *querySuperFile() override @@ -13547,7 +13549,16 @@ IDFProtectedIterator *CDistributedFileDirectory::lookupProtectedFiles(const char return new CDFProtectedIterator(owner,notsuper,superonly,defaultTimeout); } +const char* DFUQResultFieldNames[] = { "@name", "@description", "@group", "@kind", "@modified", "@job", "@owner", + "@DFUSFrecordCount", "@recordCount", "@recordSize", "@DFUSFsize", "@size", "@workunit", "@DFUSFcluster", "@numsubfiles", + "@accessed", "@numparts", "@compressedSize", "@directory", "@partmask", "@superowners", "@persistent", "@protect", "@compressed", + "@cost", "@numDiskReads", "@numDiskWrites", "@atRestCost", "@accessCost", "@maxSkew", "@minSkew", "@maxSkewPart", "@minSkewPart", + "@readCost", "@writeCost" }; +extern da_decl const char* getDFUQResultFieldName(DFUQResultField field) +{ + return DFUQResultFieldNames[field]; +} IPropertyTreeIterator *deserializeFileAttrIterator(MemoryBuffer& mb, unsigned numFiles, DFUQResultField* localFilters, const char* localFilterBuf) { diff --git a/dali/base/dadfs.hpp b/dali/base/dadfs.hpp index 1d50d63cf32..985b2fb0c7e 100644 --- a/dali/base/dadfs.hpp +++ b/dali/base/dadfs.hpp @@ -304,16 +304,8 @@ enum DFUQResultField }; extern da_decl const char* getDFUQFilterFieldName(DFUQFilterField field); -constexpr const char* DFUQResultFieldNames[] = { "@name", "@description", "@group", "@kind", "@modified", "@job", "@owner", - "@DFUSFrecordCount", "@recordCount", "@recordSize", "@DFUSFsize", "@size", "@workunit", "@DFUSFcluster", "@numsubfiles", - "@accessed", "@numparts", "@compressedSize", "@directory", "@partmask", "@superowners", "@persistent", "@protect", "@compressed", - "@cost", "@numDiskReads", "@numDiskWrites", "@atRestCost", "@accessCost", "@maxSkew", "@minSkew", "@maxSkewPart", "@minSkewPart", - "@readCost", "@writeCost" }; +extern da_decl const char* getDFUQResultFieldName(DFUQResultField field); -extern da_decl constexpr const char* getDFUQResultFieldName(DFUQResultField field) -{ - return DFUQResultFieldNames[field]; -} /** * File operations can be included in a transaction to ensure that multiple * updates are handled atomically. This is the interface to a transaction @@ -908,6 +900,20 @@ constexpr bool defaultNonPrivilegedUser = false; extern da_decl void configurePreferredPlanes(); +template +inline cost_type calcReadCost(const IPropertyTree & fileAttr, Source source) +{ + // Calculate legacy read cost from numDiskReads + // (However, it is not possible to accurately calculate read cost for key + // files, as the reads may have been from page cache and not from disk.) + if (!isFileKey(fileAttr) && source) + { + stat_type numDiskReads = fileAttr.getPropInt64(getDFUQResultFieldName(DFUQRFnumDiskReads), 0); + return calcFileAccessCost(source, 0, numDiskReads); + } + return 0; +} + // Get read cost from readCost field or calculate legacy read cost // - migrateLegacyCost: if true, update readCost field with legacy read cost template @@ -917,19 +923,11 @@ inline cost_type getReadCost(IPropertyTree & fileAttr, Source source, bool migra return fileAttr.getPropInt64(getDFUQResultFieldName(DFUQRFreadCost), 0); else { - // Calculate legacy read cost from numDiskReads - // (However, it is not possible to accurately calculate read cost for key - // files, as the reads may have been from page cache and not from disk.) - if (!isFileKey(fileAttr) && source) - { - stat_type numDiskReads = fileAttr.getPropInt64(getDFUQResultFieldName(DFUQRFnumDiskReads), 0); - cost_type readCost = calcFileAccessCost(source, 0, numDiskReads); - if (migrateLegacyCost) - fileAttr.setPropInt64(getDFUQResultFieldName(DFUQRFreadCost), readCost); - return readCost; - } + cost_type readCost = calcReadCost(fileAttr, source); + if (migrateLegacyCost) + fileAttr.setPropInt64(getDFUQResultFieldName(DFUQRFreadCost), readCost); + return readCost; } - return 0; } // Get read cost from readCost field or calculate legacy read cost @@ -939,15 +937,16 @@ inline cost_type getReadCost(const IPropertyTree & fileAttr, Source source) if (fileAttr.hasProp(getDFUQResultFieldName(DFUQRFreadCost))) return fileAttr.getPropInt64(getDFUQResultFieldName(DFUQRFreadCost), 0); else + return calcReadCost(fileAttr, source); +} + +template +inline cost_type calcWriteCost(const IPropertyTree & fileAttr, Source source) +{ + if (source) { - // Calculate legacy read cost from numDiskReads - // (However, it is not possible to accurately calculate read cost for key - // files, as the reads may have been from page cache and not from disk.) - if (!isFileKey(fileAttr) && source) - { - stat_type numDiskReads = fileAttr.getPropInt64(getDFUQResultFieldName(DFUQRFnumDiskReads), 0); - return calcFileAccessCost(source, 0, numDiskReads); - } + stat_type numDiskWrites = fileAttr.getPropInt64(getDFUQResultFieldName(DFUQRFnumDiskWrites), 0); + return calcFileAccessCost(source, numDiskWrites, 0); } return 0; } @@ -961,17 +960,11 @@ inline cost_type getWriteCost(IPropertyTree & fileAttr, Source source, bool migr return fileAttr.getPropInt64(getDFUQResultFieldName(DFUQRFwriteCost), 0); else { - // Calculate legacy write cost from numDiskWrites - if (source) - { - stat_type numDiskWrites = fileAttr.getPropInt64(getDFUQResultFieldName(DFUQRFnumDiskWrites), 0); - cost_type writeCost = calcFileAccessCost(source, numDiskWrites, 0); - if (migrateLegacyCost) - fileAttr.setPropInt64(getDFUQResultFieldName(DFUQRFwriteCost), writeCost); - return writeCost; - } + cost_type writeCost = calcWriteCost(fileAttr, source); + if (migrateLegacyCost) + fileAttr.setPropInt64(getDFUQResultFieldName(DFUQRFwriteCost), writeCost); + return writeCost; } - return 0; } // Get write cost from writeCost field or calculate legacy write cost @@ -981,15 +974,7 @@ inline cost_type getWriteCost(const IPropertyTree & fileAttr, Source source) if (fileAttr.hasProp(getDFUQResultFieldName(DFUQRFwriteCost))) return fileAttr.getPropInt64(getDFUQResultFieldName(DFUQRFwriteCost), 0); else - { - // Calculate legacy write cost from numDiskWrites - if (source) - { - stat_type numDiskWrites = fileAttr.getPropInt64(getDFUQResultFieldName(DFUQRFnumDiskWrites), 0); - return calcFileAccessCost(source, numDiskWrites, 0); - } - } - return 0; + return calcWriteCost(fileAttr, source); } extern da_decl bool doesPhysicalMatchMeta(IPartDescriptor &partDesc, IFile &iFile, offset_t &expectedSize, offset_t &actualSize); From 1cdeb502eb6b2700cad160331c0d939bd3cfcbe2 Mon Sep 17 00:00:00 2001 From: Shamser Ahmed Date: Mon, 10 Feb 2025 13:18:03 +0000 Subject: [PATCH 3/3] HPCC-33334 Use cache stats to calculate all index read activities Signed-off-by: Shamser Ahmed --- thorlcr/graph/thgraphmaster.cpp | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/thorlcr/graph/thgraphmaster.cpp b/thorlcr/graph/thgraphmaster.cpp index cd04ffacc83..a4fa975f5d8 100644 --- a/thorlcr/graph/thgraphmaster.cpp +++ b/thorlcr/graph/thgraphmaster.cpp @@ -674,9 +674,13 @@ cost_type CMasterActivity::calcFileReadCostStats(bool updateFileProps) return curReadCost; }; cost_type readCost = 0; + ThorActivityKind actKind = container.getKind(); + bool isIndexActivity = (TAKindexread == actKind) || (TAKkeyedjoin == actKind) || (TAKindexnormalize == actKind) + || (TAKindexaggregate == actKind) || (TAKindexcount == actKind) || (TAKindexgroupaggregate == actKind) + || (TAKindexgroupexists == actKind) || (TAKindexgroupcount == actKind); + if (fileStats.size()>0) { - ThorActivityKind activityKind = container.getKind(); unsigned fileIndex = 0; diskAccessCost = 0; for (unsigned i=0; i